dnet-hadoop/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml

<workflow-app name="dump_whole_graph" xmlns="uri:oozie:workflow:0.5">

    <parameters>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
        </property>
        <property>
            <name>isLookUpUrl</name>
            <description>the isLookup service endpoint</description>
        </property>
        <property>
            <name>outputPath</name>
            <description>the output path</description>
        </property>
        <property>
            <name>resultAggregation</name>
            <description>true if all the result type have to be dumped under result. false otherwise</description>
        </property>
        <property>
            <name>accessToken</name>
            <description>the access token used for the deposition in Zenodo</description>
        </property>
        <property>
            <name>connectionUrl</name>
            <description>the connection url for Zenodo</description>
        </property>
        <property>
            <name>metadata</name>
            <description> the metadata associated to the deposition</description>
        </property>
        <property>
            <name>depositionType</name>
            <description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
        </property>
        <property>
            <name>conceptRecordId</name>
            <description>for new version, the id of the record for the old deposition</description>
        </property>
        <property>
            <name>depositionId</name>
            <description>the depositionId of a deposition open that has to be added content</description>
        </property>
        <property>
            <name>organizationCommunityMap</name>
            <description>the organization community map</description>
        </property>

        <property>
            <name>hiveDbName</name>
            <description>the target hive database name</description>
        </property>
        <property>
            <name>hiveJdbcUrl</name>
            <description>hive server jdbc url</description>
        </property>
        <property>
            <name>hiveMetastoreUris</name>
            <description>hive server metastore URIs</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
        <property>
            <name>oozieActionShareLibForSpark2</name>
            <description>oozie action sharelib for spark 2.*</description>
        </property>
        <property>
            <name>spark2ExtraListeners</name>
            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
            <description>spark 2.* extra listeners classname</description>
        </property>
        <property>
            <name>spark2SqlQueryExecutionListeners</name>
            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
            <description>spark 2.* sql query execution listeners classname</description>
        </property>
        <property>
            <name>spark2YarnHistoryServerAddress</name>
            <description>spark 2.* yarn history server address</description>
        </property>
        <property>
            <name>spark2EventLogDir</name>
            <description>spark 2.* event log dir location</description>
        </property>
    </parameters>

    <global>
        <job-tracker>${jobTracker}</job-tracker>
        <name-node>${nameNode}</name-node>
        <configuration>
            <property>
                <name>mapreduce.job.queuename</name>
                <value>${queueName}</value>
            </property>
            <property>
                <name>oozie.launcher.mapred.job.queue.name</name>
                <value>${oozieLauncherQueueName}</value>
            </property>
            <property>
                <name>oozie.action.sharelib.for.spark</name>
                <value>${oozieActionShareLibForSpark2}</value>
            </property>

        </configuration>
    </global>

    <start to="reset_outputpath"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

    <action name="reset_outputpath">
        <fs>
            <delete path="${outputPath}"/>
            <mkdir path="${outputPath}"/>
        </fs>
        <ok to="save_community_map"/>
        <error to="Kill"/>
    </action>

    <action name="save_community_map">
        <java>
            <main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
            <arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
            <arg>--nameNode</arg><arg>${nameNode}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </java>
        <ok to="fork_dump"/>
        <error to="Kill"/>
    </action>

    <fork name="fork_dump">
        <path start="dump_publication"/>
        <path start="dump_dataset"/>
        <path start="dump_orp"/>
        <path start="dump_software"/>
        <path start="dump_organization"/>
        <path start="dump_project"/>
        <path start="dump_datasource"/>
        <path start="dump_relation"/>
    </fork>

    <action name="dump_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table publication </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_dump"/>
        <error to="Kill"/>
    </action>

    <action name="dump_dataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table dataset </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_dump"/>
        <error to="Kill"/>
    </action>

    <action name="dump_orp">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table ORP </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_dump"/>
        <error to="Kill"/>
    </action>

    <action name="dump_software">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table software </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_dump"/>
        <error to="Kill"/>
    </action>

    <action name="dump_organization">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table organization </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/organization</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
            <arg>--outputPath</arg><arg>${workingDir}/collect/organization</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_dump"/>
        <error to="Kill"/>
    </action>

    <action name="dump_project">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table project </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
            <arg>--outputPath</arg><arg>${workingDir}/collect/project</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_dump"/>
        <error to="Kill"/>
    </action>

    <action name="dump_datasource">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table datasource </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/datasource</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
            <arg>--outputPath</arg><arg>${workingDir}/collect/datasource</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_dump"/>
        <error to="Kill"/>
    </action>

    <action name="dump_relation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table relation </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/relation/relation</arg>
        </spark>
        <ok to="join_dump"/>
        <error to="Kill"/>
    </action>

    <join name="join_dump" to="fork_context"/>

    <fork name="fork_context">
        <path start="create_entities_fromcontext"/>
        <path start="create_relation_fromcontext"/>
        <path start="create_relation_fromorgs"/>
    </fork>

    <action name="create_entities_fromcontext">
        <java>
            <main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
            <arg>--hdfsPath</arg><arg>${workingDir}/collect/communities_infrastructures</arg>
            <arg>--nameNode</arg><arg>${nameNode}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </java>
        <ok to="join_context"/>
        <error to="Kill"/>
    </action>

    <action name="create_relation_fromcontext">
        <java>
            <main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
            <arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
            <arg>--nameNode</arg><arg>${nameNode}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </java>
        <ok to="join_context"/>
        <error to="Kill"/>
    </action>

    <action name="create_relation_fromorgs">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table relation </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/relation/contextOrg</arg>
            <arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_context"/>
        <error to="Kill"/>
    </action>

    <join name="join_context" to="fork_extract_relations"/>

    <fork name="fork_extract_relations">
        <path start="rels_from_pubs"/>
        <path start="rels_from_dats"/>
        <path start="rels_from_orp"/>
        <path start="rels_from_sw"/>
    </fork>

    <action name="rels_from_pubs">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Extract Relations from publication </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/relation/publication</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_extract_relations"/>
        <error to="Kill"/>
    </action>

    <action name="rels_from_dats">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table dataset </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/relation/dataset</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_extract_relations"/>
        <error to="Kill"/>
    </action>

    <action name="rels_from_orp">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table ORP </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/relation/orp</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_extract_relations"/>
        <error to="Kill"/>
    </action>

    <action name="rels_from_sw">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Dump table software </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/relation/software</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
        </spark>
        <ok to="join_extract_relations"/>
        <error to="Kill"/>
    </action>

    <join name="join_extract_relations" to="collect_and_save"/>

    <action name="collect_and_save">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Collect Results and Relations and put them in the right path </name>
            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingDir}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/collect</arg>
            <arg>--resultAggregation</arg><arg>${resultAggregation}</arg>
        </spark>
        <ok to="make_archive"/>
        <error to="Kill"/>
    </action>

    <action name="make_archive">
        <java>
            <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
            <arg>--hdfsPath</arg><arg>${outputPath}</arg>
            <arg>--nameNode</arg><arg>${nameNode}</arg>
            <arg>--sourcePath</arg><arg>${workingDir}/collect</arg>
        </java>
        <ok to="send_zenodo"/>
        <error to="Kill"/>
    </action>

    <action name="send_zenodo">
        <java>
            <main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
            <arg>--hdfsPath</arg><arg>${outputPath}</arg>
            <arg>--nameNode</arg><arg>${nameNode}</arg>
            <arg>--accessToken</arg><arg>${accessToken}</arg>
            <arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
            <arg>--metadata</arg><arg>${metadata}</arg>
            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
            <arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
            <arg>--depositionType</arg><arg>${depositionType}</arg>
            <arg>--depositionId</arg><arg>${depositionId}</arg>
        </java>
        <ok to="End"/>
        <error to="Kill"/>
    </action>

    <end name="End"/>

</workflow-app>