added the maketar and send to zenodo step. Adjusted wf parameters

This commit is contained in:
Miriam Baglioni 2020-08-11 15:38:20 +02:00
parent ddc19de2e9
commit 6f43acda5e
1 changed files with 75 additions and 33 deletions

View File

@ -13,6 +13,26 @@
<name>outputPath</name> <name>outputPath</name>
<description>the output path</description> <description>the output path</description>
</property> </property>
<property>
<name>accessToken</name>
<description>the access token used for the deposition in Zenodo</description>
</property>
<property>
<name>connectionUrl</name>
<description>the connection url for Zenodo</description>
</property>
<property>
<name>metadata</name>
<description> the metadata associated to the deposition</description>
</property>
<property>
<name>newDeposition</name>
<description>true if it is a brand new depositon. false for new version of an old deposition</description>
</property>
<property>
<name>conceptRecordId</name>
<description>for new version, the id of the record for the old deposition</description>
</property>
<property> <property>
<name>organizationCommunityMap</name> <name>organizationCommunityMap</name>
<description>the organization community map</description> <description>the organization community map</description>
@ -97,13 +117,13 @@
<delete path="${outputPath}"/> <delete path="${outputPath}"/>
<mkdir path="${outputPath}"/> <mkdir path="${outputPath}"/>
</fs> </fs>
<ok to="fork_dump"/> <ok to="save_community_map"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="save_community_map"> <action name="save_community_map">
<java> <java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.community.SaveCommunityMap</main-class> <main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg> <arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg> <arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
@ -143,8 +163,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/publication</arg> <arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -170,8 +189,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg> <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg> <arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -197,8 +215,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -224,8 +241,7 @@
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg> <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/software</arg> <arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -250,9 +266,9 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/organization</arg> <arg>--sourcePath</arg><arg>${sourcePath}/organization</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--outputPath</arg><arg>${outputPath}/organization</arg> <arg>--outputPath</arg><arg>${workingDir}/collect/organization</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <!-- <arg>&#45;&#45;isLookUpUrl</arg><arg>${isLookUpUrl}</arg>-->
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -277,9 +293,9 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg> <arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${outputPath}/project</arg> <arg>--outputPath</arg><arg>${workingDir}/collect/project</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <!-- <arg>&#45;&#45;isLookUpUrl</arg><arg>${isLookUpUrl}</arg>-->
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -304,9 +320,9 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/datasource</arg> <arg>--sourcePath</arg><arg>${sourcePath}/datasource</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--outputPath</arg><arg>${outputPath}/datasource</arg> <arg>--outputPath</arg><arg>${workingDir}/collect/datasource</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <!-- <arg>&#45;&#45;isLookUpUrl</arg><arg>${isLookUpUrl}</arg>-->
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -330,7 +346,7 @@
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg> <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${outputPath}/relation</arg> <arg>--outputPath</arg><arg>${workingDir}/relation/relation</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
<error to="Kill"/> <error to="Kill"/>
@ -349,8 +365,8 @@
<action name="create_entities_fromcontext"> <action name="create_entities_fromcontext">
<java> <java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextEntities</main-class> <main-class>eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextEntities</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/context</arg> <arg>--hdfsPath</arg><arg>${workingDir}/collect/context</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java> </java>
<ok to="join_context"/> <ok to="join_context"/>
@ -361,7 +377,7 @@
<java> <java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextRelation</main-class> <main-class>eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextRelation</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg> <arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java> </java>
<ok to="join_context"/> <ok to="join_context"/>
@ -424,8 +440,8 @@
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation/publication</arg> <arg>--outputPath</arg><arg>${workingDir}/relation/publication</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <!-- <arg>&#45;&#45;isLookUpUrl</arg><arg>${isLookUpUrl}</arg>-->
</spark> </spark>
<ok to="join_extract_relations"/> <ok to="join_extract_relations"/>
<error to="Kill"/> <error to="Kill"/>
@ -451,8 +467,8 @@
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg> <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation/dataset</arg> <arg>--outputPath</arg><arg>${workingDir}/relation/dataset</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <!-- <arg>&#45;&#45;isLookUpUrl</arg><arg>${isLookUpUrl}</arg>-->
</spark> </spark>
<ok to="join_extract_relations"/> <ok to="join_extract_relations"/>
<error to="Kill"/> <error to="Kill"/>
@ -478,8 +494,8 @@
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation/orp</arg> <arg>--outputPath</arg><arg>${workingDir}/relation/orp</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <!-- <arg>&#45;&#45;isLookUpUrl</arg><arg>${isLookUpUrl}</arg>-->
</spark> </spark>
<ok to="join_extract_relations"/> <ok to="join_extract_relations"/>
<error to="Kill"/> <error to="Kill"/>
@ -505,8 +521,8 @@
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg> <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation/software</arg> <arg>--outputPath</arg><arg>${workingDir}/relation/software</arg>
<!-- <arg>&#45;&#45;communityMapPath</arg><arg>${workingDir}/communityMap</arg>--> <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg> <!-- <arg>&#45;&#45;isLookUpUrl</arg><arg>${isLookUpUrl}</arg>-->
</spark> </spark>
<ok to="join_extract_relations"/> <ok to="join_extract_relations"/>
<error to="Kill"/> <error to="Kill"/>
@ -533,13 +549,39 @@
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}</arg> <arg>--sourcePath</arg><arg>${workingDir}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg> <arg>--outputPath</arg><arg>${workingDir}/collect</arg>
</spark> </spark>
<ok to="End"/> <ok to="make_archive"/>
<error to="Kill"/>
</action>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${workingDir}/collect</arg>
</java>
<ok to="send_zenodo"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
<arg>--newDeposition</arg><arg>${newDeposition}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/> <end name="End"/>