dnet-hadoop/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml

<workflow-app name="import MAG into HDFS" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>crossrefPublicationPath</name>
            <description>the Crossref Publication Path</description>
        </property>
        <property>
            <name>crossrefDatasetPath</name>
            <description>the Crossref Dataset Path</description>
        </property>
        <property>
            <name>uwPublicationPath</name>
            <description>the UnpayWall Publication Path</description>
        </property>
        <property>
            <name>magPublicationPath</name>
            <description>the MAG Publication Path</description>
        </property>
        <property>
            <name>orcidPublicationPath</name>
            <description>the ORCID Publication Path</description>
        </property>
        <property>
            <name>workingDirPath</name>
            <description>the Working Path</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
    </parameters>


    <start to="CreateDOIBoost"/>


    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

    <action name="ResetWorkingPath">
        <fs>
            <delete path='${workingDirPath}'/>
            <mkdir path='${workingDirPath}'/>
        </fs>
        <ok to="CreateDOIBoost"/>
        <error to="Kill"/>
    </action>

    <action name="CreateDOIBoost">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>Create DOIBoost Infospace</name>
            <class>eu.dnetlib.doiboost.SparkGenerateDoiBoost</class>
            <jar>dhp-doiboost-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.sql.shuffle.partitions=3840
                ${sparkExtraOPT}
            </spark-opts>
            <arg>--crossrefPublicationPath</arg><arg>${crossrefPublicationPath}</arg>
            <arg>--crossrefDatasetPath</arg><arg>${crossrefDatasetPath}</arg>
            <arg>--uwPublicationPath</arg><arg>${uwPublicationPath}</arg>
            <arg>--magPublicationPath</arg><arg>${magPublicationPath}</arg>
            <arg>--orcidPublicationPath</arg><arg>${orcidPublicationPath}</arg>
            <arg>--workingDirPath</arg><arg>${workingDirPath}</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>


<!--    <action name="PreprocessMag">-->
<!--        <spark xmlns="uri:oozie:spark-action:0.2">-->
<!--            <master>yarn-cluster</master>-->
<!--            <mode>cluster</mode>-->
<!--            <name>Convert Mag to Dataset</name>-->
<!--            <class>eu.dnetlib.doiboost.mag.SparkPreProcessMAG</class>-->
<!--            <jar>dhp-doiboost-${projectVersion}.jar</jar>-->
<!--            <spark-opts>-->
<!--                &#45;&#45;executor-memory=${sparkExecutorMemory}-->
<!--                &#45;&#45;executor-cores=${sparkExecutorCores}-->
<!--                &#45;&#45;driver-memory=${sparkDriverMemory}-->
<!--                &#45;&#45;conf spark.sql.shuffle.partitions=3840-->
<!--                ${sparkExtraOPT}-->
<!--            </spark-opts>-->
<!--            <arg>&#45;&#45;sourcePath</arg><arg>${sourcePath}</arg>-->
<!--            <arg>&#45;&#45;targetPath</arg><arg>${targetPath}</arg>-->
<!--            <arg>&#45;&#45;master</arg><arg>yarn-cluster</arg>-->
<!--        </spark>-->
<!--        <ok to="End"/>-->
<!--        <error to="Kill"/>-->
<!--    </action>-->

    <end name="End"/>
</workflow-app>
completed mapping of UnpayWall and ORCID 2020-05-22 15:15:09 +02:00			`<workflow-app name="import MAG into HDFS" xmlns="uri:oozie:workflow:0.5">`
			`<parameters>`
			`<property>`
			`<name>crossrefPublicationPath</name>`
			`<description>the Crossref Publication Path</description>`
			`</property>`
			`<property>`
			`<name>crossrefDatasetPath</name>`
			`<description>the Crossref Dataset Path</description>`
			`</property>`
			`<property>`
			`<name>uwPublicationPath</name>`
			`<description>the UnpayWall Publication Path</description>`
			`</property>`
			`<property>`
			`<name>magPublicationPath</name>`
			`<description>the MAG Publication Path</description>`
			`</property>`
			`<property>`
			`<name>orcidPublicationPath</name>`
			`<description>the ORCID Publication Path</description>`
			`</property>`
			`<property>`
			`<name>workingDirPath</name>`
			`<description>the Working Path</description>`
			`</property>`
			`<property>`
			`<name>sparkDriverMemory</name>`
			`<description>memory for driver process</description>`
			`</property>`
			`<property>`
			`<name>sparkExecutorMemory</name>`
			`<description>memory for individual executor</description>`
			`</property>`
			`<property>`
			`<name>sparkExecutorCores</name>`
			`<description>number of cores used by single executor</description>`
			`</property>`
			`</parameters>`



minor fix 2020-05-22 20:51:42 +02:00			`<start to="CreateDOIBoost"/>`
completed mapping of UnpayWall and ORCID 2020-05-22 15:15:09 +02:00

			`<kill name="Kill">`
			`<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>`
			`</kill>`

			`<action name="ResetWorkingPath">`
			`<fs>`
			`<delete path='${workingDirPath}'/>`
			`<mkdir path='${workingDirPath}'/>`
			`</fs>`
			`<ok to="CreateDOIBoost"/>`
			`<error to="Kill"/>`
			`</action>`

			`<action name="CreateDOIBoost">`
			`<spark xmlns="uri:oozie:spark-action:0.2">`
			`<master>yarn-cluster</master>`
			`<mode>cluster</mode>`
			`<name>Create DOIBoost Infospace</name>`
			`<class>eu.dnetlib.doiboost.SparkGenerateDoiBoost</class>`
			`<jar>dhp-doiboost-${projectVersion}.jar</jar>`
			`<spark-opts>`
			`--executor-memory=${sparkExecutorMemory}`
			`--executor-cores=${sparkExecutorCores}`
			`--driver-memory=${sparkDriverMemory}`
			`--conf spark.sql.shuffle.partitions=3840`
			`${sparkExtraOPT}`
			`</spark-opts>`
			`<arg>--crossrefPublicationPath</arg><arg>${crossrefPublicationPath}</arg>`
			`<arg>--crossrefDatasetPath</arg><arg>${crossrefDatasetPath}</arg>`
			`<arg>--uwPublicationPath</arg><arg>${uwPublicationPath}</arg>`
			`<arg>--magPublicationPath</arg><arg>${magPublicationPath}</arg>`
			`<arg>--orcidPublicationPath</arg><arg>${orcidPublicationPath}</arg>`
			`<arg>--workingDirPath</arg><arg>${workingDirPath}</arg>`
			`<arg>--master</arg><arg>yarn-cluster</arg>`
			`</spark>`
			`<ok to="End"/>`
			`<error to="Kill"/>`
			`</action>`



			`<!-- <action name="PreprocessMag">-->`
			`<!-- <spark xmlns="uri:oozie:spark-action:0.2">-->`
			`<!-- <master>yarn-cluster</master>-->`
			`<!-- <mode>cluster</mode>-->`
			`<!-- <name>Convert Mag to Dataset</name>-->`
			`<!-- <class>eu.dnetlib.doiboost.mag.SparkPreProcessMAG</class>-->`
			`<!-- <jar>dhp-doiboost-${projectVersion}.jar</jar>-->`
			`<!-- <spark-opts>-->`
			`<!-- --executor-memory=${sparkExecutorMemory}-->`
			`<!-- --executor-cores=${sparkExecutorCores}-->`
			`<!-- --driver-memory=${sparkDriverMemory}-->`
			`<!-- --conf spark.sql.shuffle.partitions=3840-->`
			`<!-- ${sparkExtraOPT}-->`
			`<!-- </spark-opts>-->`
			`<!-- <arg>--sourcePath</arg><arg>${sourcePath}</arg>-->`
			`<!-- <arg>--targetPath</arg><arg>${targetPath}</arg>-->`
			`<!-- <arg>--master</arg><arg>yarn-cluster</arg>-->`
			`<!-- </spark>-->`
			`<!-- <ok to="End"/>-->`
			`<!-- <error to="Kill"/>-->`
			`<!-- </action>-->`

			`<end name="End"/>`
			`</workflow-app>`