[EOSC DUMP ] modified workflow to add the indicators taken from the action set
This commit is contained in:
parent
5742f63f39
commit
ff366dd5b4
|
@ -153,13 +153,38 @@
|
|||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--resultPath</arg><arg>${workingDir}/dump/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/publicationextendedaffiliation</arg>
|
||||
</spark>
|
||||
<ok to="extend_publication_with_indicators"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="extend_publication_with_indicators">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend Dump Publication with indicators </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.ExtendWithUsageCounts</class>
|
||||
<jar>dump-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--actionSetPath</arg><arg>${actionSetPath}</arg>
|
||||
<arg>--resultPath</arg><arg>${workingDir}/dump/publicationextendedaffiliation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/publicationextended</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="dump_eosc_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
|
@ -203,13 +228,38 @@
|
|||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--resultPath</arg><arg>${workingDir}/dump/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/datasetextendedaffiliation</arg>
|
||||
</spark>
|
||||
<ok to="extend_dataset_with_indicators"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="extend_dataset_with_indicators">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend Dump Dataset with indicators </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.ExtendWithUsageCounts</class>
|
||||
<jar>dump-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--actionSetPath</arg><arg>${actionSetPath}</arg>
|
||||
<arg>--resultPath</arg><arg>${workingDir}/dump/datasetextendedaffiliation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/datasetextended</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="dump_eosc_orp">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
|
@ -254,12 +304,37 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--resultPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproductextendedaffiliation</arg>
|
||||
</spark>
|
||||
<ok to="extend_orp_with_indicators"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="extend_orp_with_indicators">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend Dump ORP with indicators </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.ExtendWithUsageCounts</class>
|
||||
<jar>dump-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--actionSetPath</arg><arg>${actionSetPath}</arg>
|
||||
<arg>--resultPath</arg><arg>${workingDir}/dump/otherresearchproductextendedaffiliation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproductextended</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="dump_eosc_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
|
@ -304,12 +379,37 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--resultPath</arg><arg>${workingDir}/dump/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/softwareextendedaffiliation</arg>
|
||||
</spark>
|
||||
<ok to="extend_software_with_indicators"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="extend_software_with_indicators">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extend Dump ORP with indicators </name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.ExtendWithUsageCounts</class>
|
||||
<jar>dump-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--actionSetPath</arg><arg>${actionSetPath}</arg>
|
||||
<arg>--resultPath</arg><arg>${workingDir}/dump/softwareextendedaffiliation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dump/softwareextended</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_dump"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<join name="wait_eosc_dump" to="prepareResultProject"/>
|
||||
|
||||
<action name="prepareResultProject">
|
||||
|
|
Loading…
Reference in New Issue