From 30ea1bda8852bc001a77f7615d1b6d3fdf380d6f Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 12 Jun 2020 10:42:35 +0200 Subject: [PATCH] oozie workflow --- .../broker/oa/GenerateEventsApplication.java | 5 +- .../generate_all/oozie_app/config-default.xml | 18 ++++ .../oa/generate_all/oozie_app/workflow.xml | 99 +++++++++++++++++++ .../dhp/broker/oa/generate_broker_events.json | 26 +++++ 4 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_broker_events.json diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index a09767192..4d40ba80d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -51,7 +51,7 @@ public class GenerateEventsApplication { IOUtils .toString( GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_broker_events.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -149,7 +149,8 @@ public class GenerateEventsApplication { return r4; } - private static Dataset relatedEntities(final Dataset targets, final Dataset rels, + private static Dataset relatedEntities(final Dataset targets, + final Dataset rels, final Class clazz) { return rels .joinWith(targets, targets.col("id").equalTo(rels.col("target")), "inner") diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/config-default.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/config-default.xml new file mode 100644 index 000000000..2e0ed9aee --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/config-default.xml @@ -0,0 +1,18 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml new file mode 100644 index 000000000..da573ae9c --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml @@ -0,0 +1,99 @@ + + + + + graphInputPath + the path where the graph is stored + + + eventsOutputPath + the path where the the events will be stored + + + isLookupUrl + the address of the lookUp service + + + dedupConfProfId + the id of a valid Dedup Configuration Profile + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + eu.dnetlib.dhp.broker.oa.GenerateEventsApplication + --graphPath${graphInputPath} + --eventsPath${eventsOutputPath} + --isLookupUrl${isLookupUrl} + --dedupConfProfile${dedupConfProfId} + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_broker_events.json b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_broker_events.json new file mode 100644 index 000000000..6ab6d9a2d --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_broker_events.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "g", + "paramLongName": "graphPath", + "paramDescription": "the path where there the graph is stored", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "eventsPath", + "paramDescription": "the path where the generated events will be stored", + "paramRequired": true + }, + { + "paramName": "lu", + "paramLongName": "isLookupUrl", + "paramDescription": "the address of the ISLookUpService", + "paramRequired": true + }, + { + "paramName": "d", + "paramLongName": "dedupConfProfile", + "paramDescription": "the id of a valid Dedup Configuration Profile", + "paramRequired": true + } +]