2024-01-15 00:08:07 +01:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.oozie;
|
|
|
|
|
2024-01-23 08:47:12 +01:00
|
|
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
2024-01-15 00:08:07 +01:00
|
|
|
|
|
|
|
import java.net.URL;
|
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Optional;
|
|
|
|
|
2024-01-23 08:47:12 +01:00
|
|
|
import org.apache.commons.lang3.time.DurationFormatUtils;
|
|
|
|
import org.apache.commons.text.StringSubstitutor;
|
|
|
|
import org.apache.spark.SparkConf;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
import com.google.common.io.Resources;
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
2024-01-15 00:08:07 +01:00
|
|
|
|
|
|
|
public class RunSQLSparkJob {
|
|
|
|
private static final Logger log = LoggerFactory.getLogger(RunSQLSparkJob.class);
|
|
|
|
|
|
|
|
private final ArgumentApplicationParser parser;
|
|
|
|
|
|
|
|
public RunSQLSparkJob(ArgumentApplicationParser parser) {
|
|
|
|
this.parser = parser;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
|
|
|
|
|
Map<String, String> params = new HashMap<>();
|
|
|
|
for (int i = 0; i < args.length - 1; i++) {
|
|
|
|
if (args[i].startsWith("--")) {
|
|
|
|
params.put(args[i].substring(2), args[++i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* String jsonConfiguration = IOUtils .toString( Objects .requireNonNull( RunSQLSparkJob.class
|
|
|
|
* .getResourceAsStream( "/eu/dnetlib/dhp/oozie/run_sql_parameters.json"))); final ArgumentApplicationParser
|
|
|
|
* parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args);
|
|
|
|
*/
|
|
|
|
|
|
|
|
Boolean isSparkSessionManaged = Optional
|
|
|
|
.ofNullable(params.get("isSparkSessionManaged"))
|
|
|
|
.map(Boolean::valueOf)
|
|
|
|
.orElse(Boolean.TRUE);
|
|
|
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
|
|
|
|
|
|
URL url = com.google.common.io.Resources.getResource(params.get("sql"));
|
|
|
|
String raw_sql = Resources.toString(url, StandardCharsets.UTF_8);
|
|
|
|
|
|
|
|
String sql = StringSubstitutor.replace(raw_sql, params);
|
|
|
|
log.info("sql: {}", sql);
|
|
|
|
|
|
|
|
SparkConf conf = new SparkConf();
|
|
|
|
conf.set("hive.metastore.uris", params.get("hiveMetastoreUris"));
|
|
|
|
|
|
|
|
runWithSparkHiveSession(
|
|
|
|
conf,
|
|
|
|
isSparkSessionManaged,
|
|
|
|
spark -> {
|
|
|
|
for (String statement : sql.split(";\\s*/\\*\\s*EOS\\s*\\*/\\s*")) {
|
|
|
|
log.info("executing: {}", statement);
|
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
spark.sql(statement).show();
|
|
|
|
log
|
|
|
|
.info(
|
|
|
|
"executed in {}",
|
|
|
|
DurationFormatUtils.formatDuration(System.currentTimeMillis() - startTime, "HH:mm:ss.S"));
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|