added the steps of reset output dir and copy information not changed by the propagation step

This commit is contained in:
Miriam Baglioni 2020-04-23 12:12:07 +02:00
parent 19cd5b85c0
commit 6f35f5ca42
4 changed files with 156 additions and 17 deletions

View File

@ -39,12 +39,30 @@
</parameters> </parameters>
<start to="prepare_datasource_country_association"/> <start to="reset-outputpath"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
<action name="reset-outputpath">
<fs>
<delete path='${workingDir}/country_propagation'/>
<delete path='${workingDir}/preparedInfo'/>
</fs>
<ok to="copy_relation"/>
<error to="Kill"/>
</action>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${workingDir}/country_propagation/relation</arg>
</distcp>
<ok to="prepare_datasource_country_association"/>
<error to="Kill"/>
</action>
<action name="prepare_datasource_country_association"> <action name="prepare_datasource_country_association">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
@ -65,7 +83,7 @@
<arg>--whitelist</arg><arg>${whitelist}</arg> <arg>--whitelist</arg><arg>${whitelist}</arg>
<arg>--allowedtypes</arg><arg>${allowedtypes}</arg> <arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg> <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg> <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="fork_join_apply_country_propagation"/> <ok to="fork_join_apply_country_propagation"/>
<error to="Kill"/> <error to="Kill"/>
@ -101,7 +119,7 @@
<arg>--saveGraph</arg><arg>${saveGraph}</arg> <arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/publication</arg> <arg>--outputPath</arg><arg>${workingDir}/country_propagation/publication</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
@ -131,7 +149,7 @@
<arg>--saveGraph</arg><arg>${saveGraph}</arg> <arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/dataset</arg> <arg>--outputPath</arg><arg>${workingDir}/country_propagation/dataset</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
@ -161,7 +179,7 @@
<arg>--saveGraph</arg><arg>${saveGraph}</arg> <arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${workingDir}/country_propagation/otherresearchproduct</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
@ -191,7 +209,7 @@
<arg>--saveGraph</arg><arg>${saveGraph}</arg> <arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/country_propagation/software</arg> <arg>--outputPath</arg><arg>${workingDir}/country_propagation/software</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/country_propagation/preparedInfo</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark> </spark>
<ok to="wait"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -27,11 +27,25 @@
</kill> </kill>
<action name="reset-outputpath"> <action name="reset-outputpath">
<fs> <fs>
<delete path='${workingDir}/projecttoresult_propagation/relation'/> <delete path='${workingDir}/orcid_propagation'/>
<delete path='${workingDir}/preparedInfo'/>
</fs> </fs>
<ok to="copy_relation"/>
<error to="Kill"/>
</action>
<action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${workingDir}/orcid_propagation/relation</arg>
</distcp>
<ok to="fork_prepare_assoc_step1"/> <ok to="fork_prepare_assoc_step1"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<fork name="fork_prepare_assoc_step1"> <fork name="fork_prepare_assoc_step1">
<path start="join_prepare_publication"/> <path start="join_prepare_publication"/>
<path start="join_prepare_dataset"/> <path start="join_prepare_dataset"/>
@ -208,7 +222,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg> <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/publication</arg> <arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/publication</arg>
</spark> </spark>
<ok to="wait2"/> <ok to="wait2"/>
<error to="Kill"/> <error to="Kill"/>
@ -235,7 +249,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg> <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg> <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg> <arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/dataset</arg>
</spark> </spark>
<ok to="wait2"/> <ok to="wait2"/>
<error to="Kill"/> <error to="Kill"/>
@ -262,7 +276,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg> <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/otherresearchproduct</arg>
</spark> </spark>
<ok to="wait2"/> <ok to="wait2"/>
<error to="Kill"/> <error to="Kill"/>
@ -289,7 +303,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg> <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg> <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/software</arg> <arg>--outputPath</arg><arg>${workingDir}/orcid_propagation/software</arg>
</spark> </spark>
<ok to="wait2"/> <ok to="wait2"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -37,11 +37,19 @@
<action name="reset-outputpath"> <action name="reset-outputpath">
<fs> <fs>
<delete path='${workingDir}/projecttoresult_propagation/relation'/> <delete path='${workingDir}/projecttoresult_propagation'/>
</fs> </fs>
<ok to="copy_relation"/> <ok to="copy_relations"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<fork name="copy_relations">
<path start="copy_relation"/>
<path start="copy_publication"/>
<path start="copy_dataset"/>
<path start="copy_orp"/>
<path start="copy_software"/>
</fork>
<action name="copy_relation"> <action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2"> <distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>
@ -49,10 +57,57 @@
<arg>${nameNode}/${sourcePath}/relation</arg> <arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/relation</arg> <arg>${nameNode}/${workingDir}/projecttoresult_propagation/relation</arg>
</distcp> </distcp>
<ok to="prepare_project_results_association"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="copy_publication">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/publication</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/publication</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_dataset">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/dataset</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/dataset</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_orp">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/otherresearchproduct</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_software">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/software</arg>
<arg>${nameNode}/${workingDir}/projecttoresult_propagation/software</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<join name="wait" to="prepare_project_results_association"/>
<action name="prepare_project_results_association"> <action name="prepare_project_results_association">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>

View File

@ -22,11 +22,20 @@
<action name="reset-outputpath"> <action name="reset-outputpath">
<fs> <fs>
<delete path='${workingDir}/resulttoorganization_propagation/relation'/> <delete path='${workingDir}/resulttoorganization_propagation'/>
</fs> </fs>
<ok to="copy_relation"/> <ok to="copy_relations"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<fork name="copy_relations">
<path start="copy_relation"/>
<path start="copy_publication"/>
<path start="copy_dataset"/>
<path start="copy_orp"/>
<path start="copy_software"/>
</fork>
<action name="copy_relation"> <action name="copy_relation">
<distcp xmlns="uri:oozie:distcp-action:0.2"> <distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>
@ -34,10 +43,53 @@
<arg>${nameNode}/${sourcePath}/relation</arg> <arg>${nameNode}/${sourcePath}/relation</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/relation</arg> <arg>${nameNode}/${workingDir}/resulttoorganization_propagation/relation</arg>
</distcp> </distcp>
<ok to="prepare_result_organization_association"/> <ok to="wait"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="copy_publication">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/publication</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/publication</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_dataset">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/dataset</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/dataset</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_orp">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/otherresearchproduct</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<action name="copy_software">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<arg>${nameNode}/${sourcePath}/software</arg>
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/software</arg>
</distcp>
<ok to="wait"/>
<error to="Kill"/>
</action>
<join name="wait" to="prepare_result_organization_association"/>
<action name="prepare_result_organization_association"> <action name="prepare_result_organization_association">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>