[resolution wf] added optional parameter to skip the entity resolution

This commit is contained in:
Claudio Atzori 2021-11-26 15:38:56 +01:00
parent 5c6d328537
commit 014e872ae1
1 changed files with 61 additions and 3 deletions

View File

@ -12,6 +12,11 @@
<name>targetPath</name>
<description>the target path after resolution</description>
</property>
<property>
<name>shouldResolveEntities</name>
<value>true</value>
<description>allows to activate/deactivate the resolution process over the entities</description>
</property>
</parameters>
<start to="ResolveRelations"/>
@ -42,10 +47,18 @@
<arg>--workingPath</arg><arg>${workingDir}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
</spark>
<ok to="ResolveEntities"/>
<ok to="decision_resolveEntities"/>
<error to="Kill"/>
</action>
<decision name="decision_resolveEntities">
<switch>
<case to="copy_result">${wf:conf('shouldResolveEntities') eq false}</case>
<case to="ResolveEntities">${wf:conf('shouldResolveEntities') eq true}</case>
<default to="ResolveEntities"/>
</switch>
</decision>
<action name="ResolveEntities">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
@ -73,10 +86,55 @@
<error to="Kill"/>
</action>
<fork name="copy_result">
<path start="copy_publication"/>
<path start="copy_dataset"/>
<path start="copy_otherresearchproduct"/>
<path start="copy_software"/>
</fork>
<action name="copy_publication">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphBasePath}/publication</arg>
<arg>${nameNode}/${targetPath}/publication</arg>
</distcp>
<ok to="copy_wait_result"/>
<error to="Kill"/>
</action>
<action name="copy_dataset">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphBasePath}/dataset</arg>
<arg>${nameNode}/${targetPath}/dataset</arg>
</distcp>
<ok to="copy_wait_result"/>
<error to="Kill"/>
</action>
<action name="copy_otherresearchproduct">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphBasePath}/otherresearchproduct</arg>
<arg>${nameNode}/${targetPath}/otherresearchproduct</arg>
</distcp>
<ok to="copy_wait_result"/>
<error to="Kill"/>
</action>
<action name="copy_software">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphBasePath}/software</arg>
<arg>${nameNode}/${targetPath}/software</arg>
</distcp>
<ok to="copy_wait_result"/>
<error to="Kill"/>
</action>
<join name="copy_wait_result" to="copy_entities"/>
<fork name="copy_entities">
<path start="copy_organization"/>
<path start="copy_projects"/>
<path start="copy_datasources"/>
<path start="copy_datasource"/>
</fork>
<action name="copy_organization">
@ -97,7 +155,7 @@
<error to="Kill"/>
</action>
<action name="copy_datasources">
<action name="copy_datasource">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${nameNode}/${graphBasePath}/datasource</arg>
<arg>${nameNode}/${targetPath}/datasource</arg>