forked from D-Net/dnet-hadoop
oozie workflow
This commit is contained in:
parent
c22cb5a3c6
commit
30ea1bda88
|
@ -51,7 +51,7 @@ public class GenerateEventsApplication {
|
|||
IOUtils
|
||||
.toString(
|
||||
GenerateEventsApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_broker_events.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
final Boolean isSparkSessionManaged = Optional
|
||||
|
@ -149,7 +149,8 @@ public class GenerateEventsApplication {
|
|||
return r4;
|
||||
}
|
||||
|
||||
private static <T, RT> Dataset<RT> relatedEntities(final Dataset<T> targets, final Dataset<Relation> rels,
|
||||
private static <T, RT> Dataset<RT> relatedEntities(final Dataset<T> targets,
|
||||
final Dataset<Relation> rels,
|
||||
final Class<RT> clazz) {
|
||||
return rels
|
||||
.joinWith(targets, targets.col("id").equalTo(rels.col("target")), "inner")
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,99 @@
|
|||
<workflow-app name="create broker events" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>graphInputPath</name>
|
||||
<description>the path where the graph is stored</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>eventsOutputPath</name>
|
||||
<description>the path where the the events will be stored</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>the address of the lookUp service</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dedupConfProfId</name>
|
||||
<description>the id of a valid Dedup Configuration Profile</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="generate_events"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="generate_events">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${eventsOutputPath}"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.broker.oa.GenerateEventsApplication</main-class>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--eventsPath</arg><arg>${eventsOutputPath}</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--dedupConfProfile</arg><arg>${dedupConfProfId}</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -0,0 +1,26 @@
|
|||
[
|
||||
{
|
||||
"paramName": "g",
|
||||
"paramLongName": "graphPath",
|
||||
"paramDescription": "the path where there the graph is stored",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "o",
|
||||
"paramLongName": "eventsPath",
|
||||
"paramDescription": "the path where the generated events will be stored",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "lu",
|
||||
"paramLongName": "isLookupUrl",
|
||||
"paramDescription": "the address of the ISLookUpService",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "d",
|
||||
"paramLongName": "dedupConfProfile",
|
||||
"paramDescription": "the id of a valid Dedup Configuration Profile",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue