extended the workflow to add the dump for the relations

This commit is contained in:
Miriam Baglioni 2023-10-25 11:48:42 +02:00
parent a821371af2
commit 25267c1689
1 changed files with 25 additions and 1 deletions

View File

@ -603,7 +603,31 @@
</action>
<join name="join_extend_relation" to="make_archive"/>
<join name="join_extend_relation" to="dump_organization_project_relations"/>
<action name="dump_organization_project_relations">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump for the relations between organization and projects in the subset of entities relevant for EOSC</name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkDumpOrganizationProject</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/</arg>
</spark>
<ok to="make_archive"/>
<error to="Kill"/>
</action>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>