forked from D-Net/dnet-hadoop
new input parameters and workflow definition to be used in the provision pipeline
This commit is contained in:
parent
026f297e49
commit
951517f9ec
|
@ -11,17 +11,11 @@
|
|||
"paramDescription": "the hive metastore uris",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"wu",
|
||||
"paramLongName":"writeUpdate",
|
||||
"paramDescription": "true if the update must be writte. No double check if information is already present",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"sg",
|
||||
"paramLongName":"saveGraph",
|
||||
"paramDescription": "true if the new version of the graph must be saved",
|
||||
"paramRequired": true
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"dop",
|
||||
|
|
|
@ -5,12 +5,8 @@
|
|||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>writeUpdate</name>
|
||||
<description>writes the information found for the update. No double check done if the information is already present</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>saveGraph</name>
|
||||
<description>writes new version of the graph after the propagation step</description>
|
||||
<name>outputPath</name>
|
||||
<description>sets the outputPath</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
|
@ -22,18 +18,29 @@
|
|||
|
||||
<action name="reset-outputpath">
|
||||
<fs>
|
||||
<delete path='${workingDir}/resulttoorganization_propagation'/>
|
||||
<delete path='${outputPath}/relation'/>
|
||||
<delete path='${outputPath}/datasource'/>
|
||||
<delete path='${outputPath}/organization'/>
|
||||
<delete path='${outputPath}/project'/>
|
||||
<delete path='${outputPath}/publication'/>
|
||||
<delete path='${outputPath}/dataset'/>
|
||||
<delete path='${outputPath}/software'/>
|
||||
<delete path='${outputPath}/otherresearchproduct'/>
|
||||
<delete path='${workingDir}/preparedInfo'/>
|
||||
</fs>
|
||||
<ok to="copy_relations"/>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_relations">
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_publication"/>
|
||||
<path start="copy_dataset"/>
|
||||
<path start="copy_orp"/>
|
||||
<path start="copy_software"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
|
@ -41,18 +48,17 @@
|
|||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_publication">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/publication</arg>
|
||||
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/publication</arg>
|
||||
<arg>${nameNode}/${outputPath}/publication</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -63,7 +69,7 @@
|
|||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
||||
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/dataset</arg>
|
||||
<arg>${nameNode}/${outputPath}/dataset</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -74,7 +80,7 @@
|
|||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/otherresearchproduct</arg>
|
||||
<arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -84,11 +90,44 @@
|
|||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/software</arg>
|
||||
<arg>${nameNode}/${workingDir}/resulttoorganization_propagation/software</arg>
|
||||
<arg>${nameNode}/${outputPath}/software</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait" to="prepare_result_organization_association"/>
|
||||
<action name="prepare_result_organization_association">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
|
@ -108,8 +147,8 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="fork_join_apply_resulttoorganization_propagation"/>
|
||||
<!-- <ok to="End"/>-->
|
||||
|
@ -142,12 +181,11 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--writeUpdate</arg><arg>${writeUpdate}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/resulttoorganization_propagation/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -173,17 +211,15 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--writeUpdate</arg><arg>${writeUpdate}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/resulttoorganization_propagation/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_propagation_otherresearchproduct">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
|
@ -204,12 +240,11 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--writeUpdate</arg><arg>${writeUpdate}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/resulttoorganization_propagation/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -235,12 +270,30 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--writeUpdate</arg><arg>${writeUpdate}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/resulttoorganization_propagation/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resulttoorganization_propagation/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<jar>dhp-propagation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
|
Loading…
Reference in New Issue