added the steps of reset output dir and copy information not changed by the propagation step

This commit is contained in:
Miriam Baglioni 2020-04-23 12:12:07 +02:00
parent 19cd5b85c0
commit 6f35f5ca42
4 changed files with 156 additions and 17 deletions

View File

@ -39,12 +39,30 @@
</parameters>
<start to="prepare_datasource_country_association"/>
<start to="reset-outputpath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="reset-outputpath">
<fs>
<delete path='${workingDir}/country_propagation'/>
<delete path='${workingDir}/preparedInfo'/>
</fs>
<ok to="copy_relation"/>
<error to="Kill"/>
</action>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${workingDir}/country_propagation/relation</arg>
</distcp>
<ok to="prepare_datasource_country_association"/>
<error to="Kill"/>
</action>
<action name="prepare_datasource_country_association">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
@ -65,7 +83,7 @@
<arg>--whitelist</arg><arg>${whitelist}</arg>
<arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="fork_join_apply_country_propagation"/>
<error to="Kill"/>
@ -101,7 +119,7 @@
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/publication</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
@ -131,7 +149,7 @@
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/dataset</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
@ -161,7 +179,7 @@
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/otherresearchproduct</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>
@ -191,7 +209,7 @@
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/software</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="wait"/>
<error to="Kill"/>

View File

@ -27,11 +27,25 @@
</kill>
<action name="reset-outputpath">
<fs>
<delete path='${workingDir}/projecttoresult_propagation/relation'/>
<delete path='${workingDir}/orcid_propagation'/>
<delete path='${workingDir}/preparedInfo'/>
</fs>
<ok to="copy_relation"/>
<error to="Kill"/>
</action>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${workingDir}/orcid_propagation/relation</arg>
</distcp>
<ok to="fork_prepare_assoc_step1"/>
<error to="Kill"/>
</action>
<fork name="fork_prepare_assoc_step1">
<path start="join_prepare_publication"/>
<path start="join_prepare_dataset"/>
@ -208,7 +222,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/publication</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
@ -235,7 +249,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/dataset</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
@ -262,7 +276,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/otherresearchproduct</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>
@ -289,7 +303,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
<arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/software</arg>
</spark>
<ok to="wait2"/>
<error to="Kill"/>

View File

@ -37,11 +37,19 @@
<action name="reset-outputpath">
<fs>
<delete path='${workingDir}/projecttoresult_propagation/relation'/>
<delete path='${workingDir}/projecttoresult_propagation'/>
</fs>
<ok to="copy_relation"/>
<ok to="copy_relations"/>
<error to="Kill"/>
</action>
<fork name="copy_relations">
<path start="copy_relation"/>
<path start="copy_publication"/>
<path start="copy_dataset"/>
<path start="copy_orp"/>
<path start="copy_software"/>
</fork>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
@ -49,10 +57,57 @@
<arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/relation</arg>
</distcp>
<ok to="prepare_project_results_association"/>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_publication">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/publication</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/publication</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_dataset">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/dataset</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/dataset</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_orp">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/otherresearchproduct</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_software">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/software</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/software</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<join name="wait" to="prepare_project_results_association"/>
<action name="prepare_project_results_association">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>

View File

@ -22,11 +22,20 @@
<action name="reset-outputpath">
<fs>
<delete path='${workingDir}/resulttoorganization_propagation/relation'/>
<delete path='${workingDir}/resulttoorganization_propagation'/>
</fs>
<ok to="copy_relation"/>
<ok to="copy_relations"/>
<error to="Kill"/>
</action>
<fork name="copy_relations">
<path start="copy_relation"/>
<path start="copy_publication"/>
<path start="copy_dataset"/>
<path start="copy_orp"/>
<path start="copy_software"/>
</fork>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
@ -34,10 +43,53 @@
<arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/relation</arg>
</distcp>
<ok to="prepare_result_organization_association"/>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_publication">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/publication</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/publication</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_dataset">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/dataset</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/dataset</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_orp">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/otherresearchproduct</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_software">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/software</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/software</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<join name="wait" to="prepare_result_organization_association"/>
<action name="prepare_result_organization_association">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>