2021-08-09 11:53:58 +02:00
<RESOURCE_PROFILE >
<HEADER >
<RESOURCE_IDENTIFIER value= "74d90d54-bea4-4a79-82d9-adddcc89e661_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
<RESOURCE_TYPE value= "WorkflowDSResourceType" />
<RESOURCE_KIND value= "WorkflowDSResources" />
<RESOURCE_URI value= "" />
<DATE_OF_CREATION value= "2021-08-06T09:18:40+00:00" />
</HEADER>
<BODY >
<WORKFLOW_NAME > Graph construction [PROD NEW]</WORKFLOW_NAME>
<WORKFLOW_TYPE > Data Provision</WORKFLOW_TYPE>
<WORKFLOW_PRIORITY > 30</WORKFLOW_PRIORITY>
<CONFIGURATION start= "manual" >
2022-01-21 13:59:46 +01:00
<NODE isStart= "true" name= "setUnresolvedEntityPath" type= "SetEnvParameter" >
<DESCRIPTION > set the path of unresolved entities</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > unresolvedEntityPath</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /data/unresolved_PROD/content</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
2021-08-09 11:53:58 +02:00
<NODE isStart= "true" name= "setNsPrefixBlacklist" type= "SetEnvParameter" >
<DESCRIPTION > set blacklist of funder nsPrefixes</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > nsPrefixBlacklist</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "aggregatorGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the path containing the PROD AGGREGATOR graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > aggregatorGraphPath</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/00_prod_graph_aggregator</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setRawGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the RAW graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > rawGraphPath</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/01_graph_raw</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setCleanedFirstGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the the consistent graph cleaned</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > cleanedFirstGraphPath</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/02_graph_cleaned</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
2022-01-21 13:59:46 +01:00
<NODE isStart= "true" name= "setResolvedGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the CLEANED graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > resolvedGraphPath</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/beta_inference/graph/03_graph_resolved</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
2021-08-09 11:53:58 +02:00
<NODE isStart= "true" name= "setDedupGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the DEDUPED graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > dedupGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/04_graph_dedup</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setInferredGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the INFERRED graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > inferredGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/05_graph_inferred</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setConsistentGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the CONSISTENCY graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > consistentGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/06_graph_consistent</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setOrcidGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the ORCID enriched graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > orcidGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/07_graph_orcid</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setBulkTaggingGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the BULK TAGGED graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > bulkTaggingGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/08_graph_bulktagging</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setAffiliationGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > affiliationGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/09_graph_affiliation</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setCommunityOrganizationGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the COMMUNITY from SELECTED SOURCES graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > communityOrganizationGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/10_graph_comunity_organization</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setFundingGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the FUNDING from SEMANTIC RELATION graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > fundingGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/11_graph_funding</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setCommunitySemRelGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the COMMUNITY from SEMANTIC RELATION graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > communitySemRelGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/12_graph_comunity_sem_rel</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setCountryGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the COUNTRY enriched graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > countryGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/13_graph_country</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setCleanedGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the CLEANED graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > cleanedGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/14_graph_cleaned</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setBlacklistedGraphPath" type= "SetEnvParameter" >
<DESCRIPTION > Set the target path to store the blacklisted graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > blacklistedGraphPath</PARAM>
2022-01-21 13:59:46 +01:00
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_provision/graph/15_graph_blacklisted</PARAM>
2021-08-09 11:53:58 +02:00
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setBulkTaggingPathMap" type= "SetEnvParameter" >
<DESCRIPTION > Set the map of paths for the Bulk Tagging</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > bulkTaggingPathMap</PARAM>
<PARAM managedBy= "system" name= "parameterValue" required= "true" type= "string" > {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"}</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setPropagationOrganizationCommunityMap" type= "SetEnvParameter" >
<DESCRIPTION > Set the map of associations organization, community list for the propagation of community to result through organization</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > propagationOrganizationCommunityMap</PARAM>
<PARAM managedBy= "system" name= "parameterValue" required= "true" type= "string" > {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"],
"20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]}
</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setDedupConfig" type= "SetEnvParameter" >
<DESCRIPTION > Set the dedup orchestrator name</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > dedupConfig</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > dedup-similarity-result-decisiontree-v2</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "actionSetsRaw" type= "SetEnvParameter" >
<DESCRIPTION > declares the ActionSet ids to promote in the RAW graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > actionSetIdsRawGraph</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "actionSetsIIS" type= "SetEnvParameter" >
<DESCRIPTION > declares the ActionSet ids to promote in the INFERRED graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > actionSetIdsIISGraph</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isStart= "true" name= "setIsLookUpUrl" type= "SetEnvParameter" >
<DESCRIPTION > Set the IS lookup service address</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > isLookUpUrl</PARAM>
<PARAM managedBy= "system" name= "parameterValue" required= "true" type= "string" > http://services.openaire.eu:8280/is/services/isLookUp?wsdl</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig" />
</ARCS>
</NODE>
<NODE isJoin= "true" name= "waitConfig" >
<DESCRIPTION > wait configurations</DESCRIPTION>
<PARAMETERS />
<ARCS >
<ARC to= "reuseODFClaims" />
<ARC to= "reuseOAFClaims" />
<ARC to= "reuseODF_hdfs" />
<ARC to= "reuseOAF_hdfs" />
<ARC to= "reuseODF" />
<ARC to= "reuseOAF" />
<ARC to= "reuseDB" />
<ARC to= "reuseDBOpenorgs" />
<ARC to= "contentPath" />
</ARCS>
</NODE>
<NODE name= "reuseODFClaims" type= "SetEnvParameter" >
<DESCRIPTION > reuse cached ODF claims from the PROD aggregation system</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > reuseODFClaims</PARAM>
<PARAM function= "validValues(['true', 'false'])" managedBy= "user" name= "parameterValue" required= "true" type= "string" > true</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE name= "reuseODF_hdfs" type= "SetEnvParameter" >
<DESCRIPTION > reuse cached ODF records on HDFS from the PROD aggregation system</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > reuseODFhdfs</PARAM>
<PARAM function= "validValues(['true', 'false'])" managedBy= "user" name= "parameterValue" required= "true" type= "string" > true</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE name= "reuseOAFClaims" type= "SetEnvParameter" >
<DESCRIPTION > reuse cached OAF claims from the PROD aggregation system</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > reuseOAFClaims</PARAM>
<PARAM function= "validValues(['true', 'false'])" managedBy= "user" name= "parameterValue" required= "true" type= "string" > true</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE name= "reuseOAF_hdfs" type= "SetEnvParameter" >
<DESCRIPTION > reuse cached OAF records on HDFS from the PROD aggregation system</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > reuseOAFhdfs</PARAM>
<PARAM function= "validValues(['true', 'false'])" managedBy= "user" name= "parameterValue" required= "true" type= "string" > true</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE name= "reuseDB" type= "SetEnvParameter" >
<DESCRIPTION > reuse cached DB content from the PROD aggregation system</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > reuseDB</PARAM>
<PARAM function= "validValues(['true', 'false'])" managedBy= "user" name= "parameterValue" required= "true" type= "string" > true</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE name= "reuseDBOpenorgs" type= "SetEnvParameter" >
<DESCRIPTION > reuse cached OpenOrgs content from the PROD aggregation system</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > reuseDBOpenorgs</PARAM>
<PARAM function= "validValues(['true', 'false'])" managedBy= "user" name= "parameterValue" required= "true" type= "string" > true</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE name= "reuseODF" type= "SetEnvParameter" >
<DESCRIPTION > reuse cached ODF content from the PROD aggregation system</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > reuseODF</PARAM>
<PARAM function= "validValues(['true', 'false'])" managedBy= "user" name= "parameterValue" required= "true" type= "string" > true</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE name= "reuseOAF" type= "SetEnvParameter" >
<DESCRIPTION > reuse cached OAF content from the PROD aggregation system</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > reuseOAF</PARAM>
<PARAM function= "validValues(['true', 'false'])" managedBy= "user" name= "parameterValue" required= "true" type= "string" > true</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE name= "contentPath" type= "SetEnvParameter" >
<DESCRIPTION > set the PROD aggregator content path</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "parameterName" required= "true" type= "string" > contentPath</PARAM>
<PARAM managedBy= "user" name= "parameterValue" required= "true" type= "string" > /tmp/prod_aggregator</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "waitConfig2" />
</ARCS>
</NODE>
<NODE isJoin= "true" name= "waitConfig2" >
<DESCRIPTION > wait configurations</DESCRIPTION>
<PARAMETERS />
<ARCS >
<ARC to= "aggregatorGraph" />
</ARCS>
</NODE>
<NODE name= "aggregatorGraph" type= "SubmitHadoopJob" >
<DESCRIPTION > create the PROD AGGREGATOR graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'graphOutputPath' : 'aggregatorGraphPath',
'isLookupUrl' : 'isLookUpUrl',
'reuseODFClaims' : 'reuseODFClaims',
'reuseOAFClaims' : 'reuseOAFClaims',
'reuseDB' : 'reuseDB',
'reuseDBOpenorgs' : 'reuseDBOpenorgs',
'reuseODF' : 'reuseODF',
'reuseODF_hdfs' : 'reuseODFhdfs',
'reuseOAF' : 'reuseOAF',
'reuseOAF_hdfs' : 'reuseOAFhdfs',
'contentPath' : 'contentPath',
'nsPrefixBlacklist' : 'nsPrefixBlacklist'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app',
'mongoURL' : '',
'mongoDb' : '',
'mdstoreManagerUrl' : '',
'postgresURL' : '',
'postgresUser' : '',
'postgresPassword' : '',
'postgresOpenOrgsURL' : '',
'postgresOpenOrgsUser' : '',
'postgresOpenOrgsPassword' : '',
'shouldHashId' : 'true',
'importOpenorgs' : 'true',
'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "promoteActionsRaw" />
</ARCS>
</NODE>
<NODE name= "promoteActionsRaw" type= "SubmitHadoopJob" >
<DESCRIPTION > create the RAW graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'inputActionSetIds' : 'actionSetIdsRawGraph',
'inputGraphRootPath' : 'aggregatorGraphPath',
'outputGraphRootPath' : 'rawGraphPath',
'isLookupUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G',
'activePromoteDatasetActionPayload' : 'true',
'activePromoteDatasourceActionPayload' : 'true',
'activePromoteOrganizationActionPayload' : 'true',
'activePromoteOtherResearchProductActionPayload' : 'true',
'activePromoteProjectActionPayload' : 'true',
'activePromotePublicationActionPayload' : 'true',
'activePromoteRelationActionPayload' : 'true',
'activePromoteResultActionPayload' : 'true',
'activePromoteSoftwareActionPayload' : 'true',
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "graphCleaningFirst" />
</ARCS>
</NODE>
<NODE name= "graphCleaningFirst" type= "SubmitHadoopJob" >
<DESCRIPTION > clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'graphInputPath' : 'rawGraphPath',
'graphOutputPath': 'cleanedFirstGraphPath',
'isLookupUrl': 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/clean'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
2022-01-21 13:59:46 +01:00
<ARCS >
<ARC to= "patchHostedBy" />
</ARCS>
</NODE>
<NODE name= "patchHostedBy" type= "SubmitHadoopJob" >
<DESCRIPTION > updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'cleanedFirstGraphPath'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'resumeFrom' : 'prepareInfo',
'hostedByMapPath' : '/user/dnet.production/data/hostedByMap',
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap',
'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "graphResolution" />
</ARCS>
</NODE>
<NODE name= "graphResolution" type= "SubmitHadoopJob" >
<DESCRIPTION > Graph resolution</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'graphBasePath':'cleanedFirstGraphPath',
'unresolvedPath' :'unresolvedEntityPath',
'targetPath':'resolvedGraphPath'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution',
'shouldResolveEntities' : 'false',
'sparkExecutorCores' : '4',
'sparkExecutorMemory' : '9G'
}
</PARAM>
</PARAMETERS>
2021-08-09 11:53:58 +02:00
<ARCS >
<ARC to= "duplicateScan" />
</ARCS>
</NODE>
<NODE name= "duplicateScan" type= "SubmitHadoopJob" >
<DESCRIPTION > search for duplicates in the raw graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'actionSetId' : 'dedupConfig',
2022-01-21 13:59:46 +01:00
'graphBasePath' : 'resolvedGraphPath',
2021-08-09 11:53:58 +02:00
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app',
'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
'workingPath' : '/tmp/prod_provision/working_dir/dedup',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "promoteActionsIIS" />
</ARCS>
</NODE>
<NODE name= "promoteActionsIIS" type= "SubmitHadoopJob" >
<DESCRIPTION > create the INFERRED graph</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'inputActionSetIds' : 'actionSetIdsIISGraph',
'inputGraphRootPath' : 'dedupGraphPath',
'outputGraphRootPath' : 'inferredGraphPath',
'isLookupUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G',
'activePromoteDatasetActionPayload' : 'true',
'activePromoteDatasourceActionPayload' : 'true',
'activePromoteOrganizationActionPayload' : 'true',
'activePromoteOtherResearchProductActionPayload' : 'true',
'activePromoteProjectActionPayload' : 'true',
'activePromotePublicationActionPayload' : 'true',
'activePromoteRelationActionPayload' : 'true',
'activePromoteResultActionPayload' : 'true',
'activePromoteSoftwareActionPayload' : 'true',
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "dedupConsistency" />
</ARCS>
</NODE>
<NODE name= "dedupConsistency" type= "SubmitHadoopJob" >
<DESCRIPTION > mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'graphBasePath' : 'inferredGraphPath',
'graphOutputPath': 'consistentGraphPath'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app',
'workingPath' : '/tmp/prod_provision/working_dir/dedup'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "orcidPropagation" />
</ARCS>
</NODE>
<NODE name= "orcidPropagation" type= "SubmitHadoopJob" >
<DESCRIPTION > propagates ORCID among results linked by allowedsemrels semantic relationships</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'consistentGraphPath',
'outputPath': 'orcidGraphPath'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/orcid',
'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
'saveGraph' : 'true'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "bulkTagging" />
</ARCS>
</NODE>
<NODE name= "bulkTagging" type= "SubmitHadoopJob" >
<DESCRIPTION > mark results respecting some rules as belonging to communities</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'orcidGraphPath',
'outputPath': 'bulkTaggingGraphPath',
'isLookUpUrl' : 'isLookUpUrl',
'pathMap' : 'bulkTaggingPathMap'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/bulktag'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "affiliationPropagation" />
</ARCS>
</NODE>
<NODE name= "affiliationPropagation" type= "SubmitHadoopJob" >
<DESCRIPTION > creates relashionships between results and organizations when the organizations are associated to institutional repositories</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'bulkTaggingGraphPath',
'outputPath': 'affiliationGraphPath'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/affiliation',
'saveGraph' : 'true',
'blacklist' : 'empty'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "communityOrganizationPropagation" />
</ARCS>
</NODE>
<NODE name= "communityOrganizationPropagation" type= "SubmitHadoopJob" >
<DESCRIPTION > marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'affiliationGraphPath',
'outputPath': 'communityOrganizationGraphPath',
'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/community_organization',
'saveGraph' : 'true'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "resultProjectPropagation" />
</ARCS>
</NODE>
<NODE name= "resultProjectPropagation" type= "SubmitHadoopJob" >
<DESCRIPTION > created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'communityOrganizationGraphPath',
'outputPath': 'fundingGraphPath'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/funding',
'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
'saveGraph' : 'true'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "communitySemrelPropagation" />
</ARCS>
</NODE>
<NODE name= "communitySemrelPropagation" type= "SubmitHadoopJob" >
<DESCRIPTION > tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities </DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'fundingGraphPath',
'outputPath': 'communitySemRelGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/community_semrel',
'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
'saveGraph' : 'true'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "countryPropagation" />
</ARCS>
</NODE>
<NODE name= "countryPropagation" type= "SubmitHadoopJob" >
<DESCRIPTION > associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from </DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'communitySemRelGraphPath',
'outputPath': 'countryGraphPath'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G',
'workingDir' : '/tmp/prod_provision/working_dir/country',
'allowedtypes' : 'pubsrepository::institutional',
'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0',
'saveGraph' : 'true'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "graphCleaning" />
</ARCS>
</NODE>
<NODE name= "graphCleaning" type= "SubmitHadoopJob" >
<DESCRIPTION > clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'graphInputPath' : 'countryGraphPath',
'graphOutputPath': 'cleanedGraphPath',
'isLookupUrl': 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/clean'
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "blacklistRelations" />
</ARCS>
</NODE>
<NODE name= "blacklistRelations" type= "SubmitHadoopJob" >
<DESCRIPTION > removes blacklisted relations </DESCRIPTION>
<PARAMETERS >
<PARAM managedBy= "system" name= "hadoopJob" required= "true" type= "string" > executeOozieJob</PARAM>
<PARAM managedBy= "system" name= "cluster" required= "true" type= "string" > IIS</PARAM>
<PARAM managedBy= "system" name= "envParams" required= "true" type= "string" >
{
'sourcePath' : 'cleanedGraphPath',
'outputPath': 'blacklistedGraphPath'
}
</PARAM>
<PARAM managedBy= "system" name= "params" required= "true" type= "string" >
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/blacklist',
'postgresURL' : '',
'postgresUser' : '',
'postgresPassword' : ''
}
</PARAM>
<PARAM managedBy= "system" name= "oozieReportActionsCsv" required= "true" type= "string" > build-report</PARAM>
</PARAMETERS>
<ARCS >
<ARC to= "success" />
</ARCS>
</NODE>
</CONFIGURATION>
<STATUS >
<LAST_EXECUTION_ID > wf_20210723_171026_279</LAST_EXECUTION_ID>
<LAST_EXECUTION_DATE > 2021-07-24T00:00:39+00:00</LAST_EXECUTION_DATE>
<LAST_EXECUTION_STATUS > SUCCESS</LAST_EXECUTION_STATUS>
<LAST_EXECUTION_ERROR />
</STATUS>
</BODY>
</RESOURCE_PROFILE>