forked from D-Net/dnet-hadoop
added the steps of reset output dir and copy information not changed by the propagation step
This commit is contained in:
parent
19cd5b85c0
commit
6f35f5ca42
|
@ -39,12 +39,30 @@
|
||||||
|
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="prepare_datasource_country_association"/>
|
<start to="reset-outputpath"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
<action name="reset-outputpath">
|
||||||
|
<fs>
|
||||||
|
<delete path='${workingDir}/country_propagation'/>
|
||||||
|
<delete path='${workingDir}/preparedInfo'/>
|
||||||
|
</fs>
|
||||||
|
<ok to="copy_relation"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_relation">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/country_propagation/relation</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="prepare_datasource_country_association"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
<action name="prepare_datasource_country_association">
|
<action name="prepare_datasource_country_association">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
@ -65,7 +83,7 @@
|
||||||
<arg>--whitelist</arg><arg>${whitelist}</arg>
|
<arg>--whitelist</arg><arg>${whitelist}</arg>
|
||||||
<arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
|
<arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="fork_join_apply_country_propagation"/>
|
<ok to="fork_join_apply_country_propagation"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -101,7 +119,7 @@
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/publication</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/publication</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait"/>
|
<ok to="wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -131,7 +149,7 @@
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/dataset</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/dataset</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait"/>
|
<ok to="wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -161,7 +179,7 @@
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/otherresearchproduct</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/otherresearchproduct</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait"/>
|
<ok to="wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -191,7 +209,7 @@
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/software</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/software</arg>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait"/>
|
<ok to="wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -27,11 +27,25 @@
|
||||||
</kill>
|
</kill>
|
||||||
<action name="reset-outputpath">
|
<action name="reset-outputpath">
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${workingDir}/projecttoresult_propagation/relation'/>
|
<delete path='${workingDir}/orcid_propagation'/>
|
||||||
|
<delete path='${workingDir}/preparedInfo'/>
|
||||||
</fs>
|
</fs>
|
||||||
|
<ok to="copy_relation"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_relation">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/orcid_propagation/relation</arg>
|
||||||
|
</distcp>
|
||||||
<ok to="fork_prepare_assoc_step1"/>
|
<ok to="fork_prepare_assoc_step1"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
<fork name="fork_prepare_assoc_step1">
|
<fork name="fork_prepare_assoc_step1">
|
||||||
<path start="join_prepare_publication"/>
|
<path start="join_prepare_publication"/>
|
||||||
<path start="join_prepare_dataset"/>
|
<path start="join_prepare_dataset"/>
|
||||||
|
@ -208,7 +222,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/publication</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -235,7 +249,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/dataset</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -262,7 +276,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/otherresearchproduct</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -289,7 +303,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/software</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -37,11 +37,19 @@
|
||||||
|
|
||||||
<action name="reset-outputpath">
|
<action name="reset-outputpath">
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${workingDir}/projecttoresult_propagation/relation'/>
|
<delete path='${workingDir}/projecttoresult_propagation'/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="copy_relation"/>
|
<ok to="copy_relations"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
<fork name="copy_relations">
|
||||||
|
<path start="copy_relation"/>
|
||||||
|
<path start="copy_publication"/>
|
||||||
|
<path start="copy_dataset"/>
|
||||||
|
<path start="copy_orp"/>
|
||||||
|
<path start="copy_software"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
<action name="copy_relation">
|
<action name="copy_relation">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
@ -49,10 +57,57 @@
|
||||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/relation</arg>
|
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/relation</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
<ok to="prepare_project_results_association"/>
|
<ok to="wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_publication">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/publication</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/publication</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="wait"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_dataset">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/dataset</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="wait"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_orp">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/otherresearchproduct</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="wait"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_software">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/software</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/software</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="wait"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="wait" to="prepare_project_results_association"/>
|
||||||
|
|
||||||
|
|
||||||
<action name="prepare_project_results_association">
|
<action name="prepare_project_results_association">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
|
|
@ -22,11 +22,20 @@
|
||||||
|
|
||||||
<action name="reset-outputpath">
|
<action name="reset-outputpath">
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${workingDir}/resulttoorganization_propagation/relation'/>
|
<delete path='${workingDir}/resulttoorganization_propagation'/>
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="copy_relation"/>
|
<ok to="copy_relations"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
<fork name="copy_relations">
|
||||||
|
<path start="copy_relation"/>
|
||||||
|
<path start="copy_publication"/>
|
||||||
|
<path start="copy_dataset"/>
|
||||||
|
<path start="copy_orp"/>
|
||||||
|
<path start="copy_software"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
<action name="copy_relation">
|
<action name="copy_relation">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
@ -34,10 +43,53 @@
|
||||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/relation</arg>
|
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/relation</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
<ok to="prepare_result_organization_association"/>
|
<ok to="wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_publication">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/publication</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/publication</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="wait"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_dataset">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/dataset</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="wait"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="copy_orp">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/otherresearchproduct</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="wait"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="copy_software">
|
||||||
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<arg>${nameNode}/${sourcePath}/software</arg>
|
||||||
|
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/software</arg>
|
||||||
|
</distcp>
|
||||||
|
<ok to="wait"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<join name="wait" to="prepare_result_organization_association"/>
|
||||||
<action name="prepare_result_organization_association">
|
<action name="prepare_result_organization_association">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
|
Loading…
Reference in New Issue