dnet-applications/apps/dnet-exporter-api/src/test/resources/eu/dnetlib/openaire/community/importer/old_provision_wf.xml

1251 lines
96 KiB
XML
Raw Normal View History

<RESOURCE_PROFILE>
<HEADER>
<RESOURCE_IDENTIFIER value="4801c33c-66ca-4ab6-af64-aa812194ec61_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
<RESOURCE_TYPE value="WorkflowDSResourceType"/>
<RESOURCE_KIND value="WorkflowDSResources"/>
<RESOURCE_URI value=""/>
<DATE_OF_CREATION value="2023-06-07T08:37:57+00:00"/>
</HEADER>
<BODY>
<WORKFLOW_NAME>Graph construction and processing</WORKFLOW_NAME>
<WORKFLOW_TYPE>Data Provision</WORKFLOW_TYPE>
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
<CONFIGURATION start="manual">
<NODE isStart="true" name="setParameters" type="SetEnvJsonParameters">
<DESCRIPTION>
- unresolvedEntityPath: path of unresolved entities
- nsPrefixBlacklist_PROD: blacklist of funder nsPrefixes from the prod aggregator
- idMappingPath: path of the map defining the relations id mappings
- dedupConfig: dedup orchestrator name
- actionSetIdsRawGraph: ActionSet ids to promote in the RAW graph
- isLookUpUrl: IS lookup service address
- iterations: number of iterations performed by the affiliation through semantic relation propagation algo
</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="user" name="json" required="true" type="string">
{
'unresolvedEntityPath' : '/data/unresolved_PROD/content',
'nsPrefixBlacklist' : 'conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________',
'idMappingPath' : '/data/maps/funder_remapping/',
'dedupConfig' : 'dedup-result-decisiontree-v3',
'actionSetIdsRawGraph' : 'doiboost,orcidworks-no-doi,ror',
'actionSetIdsIISGraph' : 'iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenced-projects-backup,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,opencitations,h2020classification,iis-entities-software,iis-entities-patent',
'actionSetIdsDedup' : 'bipfinder-scores,usage_counts',
'isLookUpUrl' : 'http://services.openaire.eu:8280/is/services/isLookUp?wsdl',
'iterations' : '1',
'reuseDBClaims' : 'false',
'reuseODFClaims' : 'false',
'reuseOAFClaims' : 'false',
'reuseODFhdfs' : 'false',
'reuseOAFhdfs' : 'false',
'reuseDB' : 'false',
'reuseDBOpenorgs' : 'false',
'reuseODF' : 'false',
'reuseOAF' : 'false',
'shouldPatchRelations' : 'true',
'contentPath' : '/tmp/prod_aggregator',
'aggregatorGraphPath' : '/tmp/prod_provision/graph/00_graph_aggregator',
'rawGraphPath' : '/tmp/prod_provision/graph/01_graph_raw',
'groupedGraphPath' : '/tmp/prod_provision/graph/02_graph_grouped',
'cleanedFirstGraphPath' : '/tmp/prod_provision/graph/03_graph_cleaned',
'resolvedGraphPath' : '/tmp/prod_provision/graph/04_graph_resolved',
'inferredGraphPath' : '/tmp/prod_provision/graph/05_graph_inferred',
'dedupGraphPath' : '/tmp/prod_provision/graph/06_graph_dedup',
'consistentGraphPath' : '/tmp/prod_provision/graph/07_graph_consistent',
'dedupEnrichedGraphPath' : '/tmp/prod_provision/graph/08_graph_dedup_enriched',
'orcidGraphPath' : '/tmp/prod_provision/graph/09_graph_orcid',
'bulkTaggingGraphPath' : '/tmp/prod_provision/graph/10_graph_bulktagging',
'affiliationGraphPath' : '/tmp/prod_provision/graph/11_graph_affiliation',
'affiliationSemRelGraphPath' : '/tmp/prod_provision/graph/12_graph_affiliation_semantic_relation',
'communityOrganizationGraphPath' : '/tmp/prod_provision/graph/13_graph_community_organization',
'fundingGraphPath' : '/tmp/prod_provision/graph/14_graph_funding',
'communitySemRelGraphPath' : '/tmp/prod_provision/graph/15_graph_community_semantic_relation',
'countryGraphPath' : '/tmp/prod_provision/graph/16_graph_country',
'cleanedGraphPath' : '/tmp/prod_provision/graph/17_graph_cleaned',
'blacklistedGraphPath' : '/tmp/prod_provision/graph/18_graph_blacklisted'
}
</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="resumeFrom"/>
</ARCS>
</NODE>
<NODE isStart="true" name="setResumeFromHBM" type="SetEnvParameter">
<DESCRIPTION>the step from which the hostedByMap step should be resumed. If the choice is DownloadBoth both the unibi gold and doaj files are downloaded anew. If the option id DownloadDoaj or DownloadGold only the selected file is downloaded. If the option id ProduceHBM a new hosted by map is produced. If the option is PrepareInfo the old HBM is used on the results and datasources. </DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="parameterName" required="true" type="string">HBMresumeFrom</PARAM>
<PARAM function="validValues(['DownloadBoth', 'DownloadGold', 'DownloadDoaj', 'ProduceHBM','PrepareInfo'])" managedBy="user" name="parameterValue" required="false" type="string">PrepareInfo</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="resumeFrom"/>
</ARCS>
</NODE>
<NODE isStart="true" name="setBulkTaggingPathMap" type="SetEnvParameter">
<DESCRIPTION>Set the map of paths for the Bulk Tagging</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="parameterName" required="true" type="string">bulkTaggingPathMap</PARAM>
<PARAM managedBy="system" name="parameterValue" required="true" type="string">{"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']","orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"}</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="resumeFrom"/>
</ARCS>
</NODE>
<NODE isStart="true" name="setPropagationOrganizationCommunityMap" type="SetEnvParameter">
<DESCRIPTION>Set the map of associations organization, community list for the propagation of
community to result through organization</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="parameterName" required="true" type="string">propagationOrganizationCommunityMap</PARAM>
<PARAM managedBy="system" name="parameterValue" required="true" type="string">
{
"20|openorgs____::817280e710651751433d869c71f6eee2": ["mes"],
"20|openorgs____::809457868cca956bddde9c74e3a32c3c": ["mes"],
"20|openorgs____::cb386de57b0aa53d5bbf016e15720053": ["mes"],
"20|openorgs____::fb2e1f77e2506054fa25fef7eac7b685": ["mes"],
"20|openorgs____::c4ff9a638850ebdc39e28897b58166bc": ["mes"],
"20|openorgs____::00010e003d3ce9c05e038cadfb6d4f03": ["mes"],
"20|openorgs____::c20c22d6210eea23b58869029c2abe24": ["mes"],
"20|openorgs____::ad78005de675860b9f96b84d50fc19f6": ["mes"],
"20|openorgs____::a6bfaa7b9934dd8459ca94deac34c127": ["mes"],
"20|openorgs____::e68796680c3203d0d708e576c1d55de6": ["mes"],
"20|openorgs____::41206374e08ed608c03f56fc21ec3023": ["mes"],
"20|openorgs____::7dc15ac2260f80e90c849908afe20a6e": ["mes"],
"20|openorgs____::eaea0ef989e0b9245a5d50a6c519dc52": ["mes"],
"20|openorgs____::15fb2c0b223ca34ec651f34838d2daf9": ["mes"],
"20|openorgs____::3cb19734da640f9193b20291268c9a0e": ["mes"],
"20|openorgs____::da5a68a98b9ff7231d6c3c93cde988df": ["mes"],
"20|openorgs____::b436de29bae7906f94797b8a0c235585": ["mes"],
"20|openorgs____::6a5fcbfe36fed5e8b2b50128454cc4f0": ["mes"],
"20|openorgs____::3053c65910d1c064799cb9a93e57e5cd": ["mes"],
"20|openorgs____::f95684edab7a22ac87ea2ed7c47897f2": ["mes"],
"20|openorgs____::581dcea989b861fa0106d4874ecf2d66": ["beopen"],
"20|openorgs____::90a0f7c99fb72cd0e014fcdd38c08719": ["beopen"],
"20|openorgs____::11f6b2617abf37fe7193557d77d8cd00": ["beopen"],
"20|openorgs____::600c7afdde615a68e45cfceaf684782d": ["beopen"],
"20|openorgs____::ca4e3e4e6767e05b0828ef5f0cdc7292": ["beopen"],
"20|openorgs____::b79247e30e30a8f8532a30e3b816cda9": ["beopen"],
"20|openorgs____::9cb5ffc315d7bf0f97b2f0fdc37612aa": ["beopen"],
"20|openorgs____::ad863df6deda1619a25e7fad4a534891": ["beopen"],
"20|openorgs____::72162cfc2e7edaf7515c778e04d1952b": ["beopen"],
"20|openorgs____::9e29fb5b85151a6810ce7256b08475e4": [
"mes",
"beopen"
],
"20|openorgs____::9dd5545aacd3d8019e00c3f837269746": ["beopen"],
"20|openorgs____::a86e8b969264b4c92cbf79c289a3f61a": ["beopen"],
"20|openorgs____::d6b4b35b44951f55747a7139446d21a8": ["beopen"],
"20|openorgs____::8ec069b683b9e9387492ea0c6b88a806": ["beopen"],
"20|openorgs____::d11f981828c485cd23d93f7f24f24db1": ["eut", "tunet"],
"20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a": ["eut"],
"20|openorgs____::526468206bca24c1c90da6a312295cf4": ["eut"],
"20|openorgs____::08e311e656e65ccb32e07c66b15b6ff7": ["eut"],
"20|openorgs____::55a1f889758964b77682904218fdb298": ["eut"],
"20|openorgs____::530092b6970d60a5329beb9f39e8d7d4": ["eut"],
"20|openorgs____::aadafa39392b3e200102596a3a4aad9d": ["eut"],
"20|openorgs____::c3fe999c74fad308132b8a5971367dce": ["eut"],
"20|openorgs____::1624ff7c01bb641b91f4518539a0c28a": ["aurora", "netherlands"],
"20|openorgs____::cdda7cfe17c89eb50628ec2eb1f8acd2": ["aurora"],
"20|openorgs____::818b75030e0e40612d69e049843ede7e": ["aurora"],
"20|openorgs____::0b0102bae51f4f4ef5ba57fbe1523b92": ["aurora"],
"20|openorgs____::d1f5d132714edad4a952e0414fc3c94b": ["aurora"],
"20|openorgs____::eb0669daa9efeb898a3090d8aac7c953": ["aurora"],
"20|openorgs____::eb391317ed0dc684aa81ac16265de041": ["aurora"],
"20|openorgs____::f7cfcc98245e22c7d6e321cde930e746": ["aurora"],
"20|openorgs____::f33179d3306ba2599f7a898b056b604f": ["aurora"],
"20|openorgs____::0a45de396fc864dc70d42d13f4e786e1": ["aurora"],
"20|openorgs____::039192f47416999e406a6b57da19beeb": ["aurora"],
"20|openorgs____::2023f56908812b5ac7045778fded8a0f": ["eutopia"],
"20|openorgs____::77f6d72a67be69ed181d83bc4a53fa54": ["eutopia"],
"20|openorgs____::b7e8d9a1ee2a31a5e9aaaaebac2d7cc3": ["eutopia"],
"20|openorgs____::be0ace0416ddcc3cc7ad6151d2d5d681": ["eutopia"],
"20|openorgs____::f6d0bdf4b935c2907d157429ad783eb1": ["eutopia"],
"20|openorgs____::6363a88f20cca34629ae690d539f9b67": ["eutopia"],
"20|openorgs____::52203596562cc56fcefb3e737c6e2441": ["eutopia"],
"20|openorgs____::6ba93bd765e2d7e5d2fb0cf3c1ac17b2": ["eutopia"],
"20|openorgs____::a6340e6ecf60f6bba163659df985b0f2": ["eutopia"],
"20|openorgs____::21731b21b089c81e0e1a5ed599316e2f": ["eutopia"],
"20|openorgs____::4efa936474b7478a534766170e2b3a78": ["forthem"],
"20|openorgs____::ff9f05849d91ca29be33aeeb07413536": ["forthem"],
"20|openorgs____::bfe8b338037acd41adfc0f100460e3db": ["forthem"],
"20|openorgs____::091f96b07fb8ad89e0732cd2f0c9bfcb": ["forthem"],
"20|openorgs____::5e4f5acff7b8362d8a431f27b0b123c3": ["forthem"],
"20|openorgs____::01ce6b73a6e390a709130c1bcfabeab9": ["forthem"],
"20|openorgs____::db9192d70ca3a941f6629fab56278fed": ["forthem"],
"20|openorgs____::a7502dd2669be577007c95445ca412fa": ["forthem"],
"20|openorgs____::09ddce938e9b0a54b5a831a1154be481": ["forthem"],
"20|openorgs____::7ac95584b6f0ae0b33027b9ce08cc96a": ["openaire-infrastructure"],
"20|openorgs____::027a83f256564db02650b6e1c24623d5": ["embrc"],
"20|openorgs____::dd8ba98db9a7a650207228be6f57ee29": ["instruct"],
"20|openorgs____::f0c3e27c112272a3781226c5890b228c": ["dariah"],
"20|openorgs____::0998a1d4dbfc5025ad963eeb2306969c": ["egi"],
"20|openorgs____::5ca2b95a1771149560222420df04abc7": ["sobigdata"],
"20|openorgs____::54d41926fa20801d03886c9685df4d38": ["epos"],
"20|openorgs____::ae84e04d1bb09ceec2f041532ad7175c": ["clarin"],
"20|openorgs____::5e91b35f34a6d4ead6432ef2f77d24bf": ["eu-conexus"],
"20|openorgs____::5e3400be408250ebb7d36b8d33bb3660": ["eu-conexus"],
"20|openorgs____::15ee7f73eb676be60ebc7629e42c7bf9": ["eu-conexus"],
"20|openorgs____::8a3fbaedd50cd6baabbba47b9976e4db": ["eu-conexus"],
"20|openorgs____::f1d25b27c8a254d0e0931db00497be26": ["eu-conexus"],
"20|openorgs____::b77c01aa15de3675da34277d48de2ec1": ["eu-conexus"],
"20|openorgs____::54cd984fc7d3b153ec2181f985041f02": ["eu-conexus", "tunet"],
"20|openorgs____::ed4f840895b2614ca59bc45ffb2eb15f": ["eu-conexus"],
"20|openorgs____::08c6812ca06b419e177b8e3949c9ec4c": ["eu-conexus"],
"20|openorgs____::3d57a5aadd2e0925bca78515278f2405": ["netherlands"],
"20|openorgs____::2ce10b2528220091bd0363c1e39394c1": ["netherlands"],
"20|openorgs____::fe49cd5c84da7a89da723e24cadeb99d": ["netherlands"],
"20|openorgs____::df96ffc1951dc9bc14b0695e3297f6e1": ["netherlands"],
"20|openorgs____::10020843b2d7fac7c04e40b55a631b0c": ["netherlands"],
"20|openorgs____::47f4bfbcef93eb928361dffaabd03b54": ["netherlands"],
"20|openorgs____::5e6bf8962665cdd040341171e5c631d8": ["netherlands"],
"20|openorgs____::79a0e60afef2e753b0dc12425ecb3f8c": ["netherlands"],
"20|openorgs____::a8767e80afb51d63802fdf1ddabeacb0": ["netherlands"],
"20|openorgs____::db787c50cf46f2b08e69f830e292e42d": ["netherlands"],
"20|openorgs____::2f735203eb40d8389a881e874bee537a": ["netherlands"],
"20|openorgs____::25b87dfee5c1855dab099c5a2eef225a": ["netherlands"],
"20|openorgs____::cb77737b5a390319030e6abdb12aa6ad": ["netherlands"],
"20|openorgs____::b0a51df36791d8bb939721e0ad1f92b5": ["netherlands"],
"20|openorgs____::e42580548da4a1d39bb67b60b971056e": ["netherlands"],
"20|openorgs____::2eeca4f78f2a04a35057c1fa7918e23b": ["netherlands"],
"20|openorgs____::a7e0018f6064f0dab7a8c9a48a61a1c6": ["netherlands"],
"20|openorgs____::ee938614ce34e199c2901282deb79db4": ["netherlands"],
"20|openorgs____::dd29a935a8c8b552752fd00524f5c2a3": ["netherlands"],
"20|openorgs____::fa2e191ad1d35db623f96abcedb9fb96": ["netherlands"],
"20|openorgs____::863c4184481f057bfe695c9c2786a90a": ["netherlands"],
"20|openorgs____::f2a84eb5c508838ba5dd728a1b8ac5ba": ["netherlands"],
"20|openorgs____::eb2f1247784acef155801d95b5ac937b": ["netherlands"],
"20|openorgs____::19274b9a576d284a2b900e2f112f6a0c": ["netherlands"],
"20|openorgs____::48ffea8c900e79deaa2256aa2c0a0ee2": ["netherlands"],
"20|openorgs____::54f2e88f3eb801dc7e49a4ca90fdd1b6": ["netherlands"],
"20|openorgs____::cb04606c0c4eb1696cf4bc74dba44bdf": ["netherlands"],
"20|openorgs____::a3af79fec4d09764e56cd6d4df1d976a": ["netherlands"],
"20|openorgs____::6c50ead5f8f9cac29c4c3066df9fba45": ["netherlands"],
"20|openorgs____::40a6eddd9af7a097cb83adb7c7b7fe55": ["netherlands"],
"20|openorgs____::0a49c26b60b634e6f8f6334bb8e078ae": ["netherlands"],
"20|openorgs____::374bcee2ff43200e9afe71b6dc606e6c": ["netherlands"],
"20|openorgs____::14e84ae5ab9b8fd2b05b65c19551a416": ["netherlands"],
"20|openorgs____::7d8ba78b303d8a0065029dffb414d4ae": ["netherlands"],
"20|openorgs____::b12b585c23a2b443b35cb33f57309a34": ["netherlands"],
"20|openorgs____::be5a09d4c12d29681a6985dd892f9f44": ["netherlands"],
"20|openorgs____::ba45682d5fd11bfdb695ca2fe5a0d377": ["netherlands"],
"20|openorgs____::58f65ed1ce3c9166e9c5f939bfdbf83a": ["netherlands"],
"20|openorgs____::81371ea94b1a09d3243e73d6ec3527ec": ["netherlands"],
"20|openorgs____::604881198363fedbb5d5478f465305f2": ["netherlands"],
"20|openorgs____::81b64115eb383a27a9c1820a2c760c89": ["netherlands"],
"20|openorgs____::ad8f981707a4a1d6c9e9ee60c02b9a11": ["netherlands"],
"20|openorgs____::994d7f6fc25c5de47b9212bb69524380": ["netherlands"],
"20|openorgs____::57a751ebab17a6996ba6836a89548f35": ["netherlands"],
"20|openorgs____::e79c2bf184a99250523a378626875955": ["netherlands"],
"20|openorgs____::05ba90eee0e9eef094a77240339d0ee5": ["netherlands"],
"20|openorgs____::04a66697fceec5d5e3bbf19b85135356": ["netherlands"],
"20|openorgs____::bb88336ade9d2deeece4dee6262054b4": ["netherlands"],
"20|openorgs____::b232880c47fb92a22ffea8e2e749e0ac": ["netherlands"],
"20|openorgs____::4aebed7bd67813bb8a325eb410a392be": ["netherlands"],
"20|openorgs____::4b2a9d306955ede352ba267af8616375": ["netherlands"],
"20|openorgs____::2d77f3a9829f7a9651ec60f597b479cd": ["netherlands"],
"20|openorgs____::2885352ab1a441a9d387424888e30796": ["netherlands"],
"20|openorgs____::95e5f0e3696306902a6749c406bb2728": ["netherlands"],
"20|openorgs____::048a44d990fd8a4710eef3a9e375360b": ["netherlands"],
"20|openorgs____::c254f2d5f6fdb12ecb37cfee25a3c4b1": ["netherlands"],
"20|openorgs____::775eab3c1281cb91d53a31c4a1ba1090": ["netherlands"],
"20|openorgs____::5f081781d2510771c1c6686c4a37d1a1": ["netherlands"],
"20|openorgs____::3f27054a1ab469826b9d3a1915452f89": ["netherlands"],
"20|openorgs____::0ac7645722cfee8253ce68cd2ab609c6": ["netherlands"],
"20|openorgs____::913ef94a4bc903f936164b00f287defa": ["netherlands"],
"20|openorgs____::7ec8de16e11c7fc9f7e4e32923a42b22": ["netherlands"],
"20|openorgs____::8a1b859747bd305bc855914588006010": ["netherlands"],
"20|openorgs____::d0a66a264d47d18baba1821d678627f3": ["netherlands"],
"20|openorgs____::c143bad570ce1ce6c1c5956f5f2d96ee": ["netherlands"],
"20|openorgs____::a5ac6dbe326433376c900329853d914a": ["netherlands"],
"20|openorgs____::de44646096a724bdba876f7f573e74eb": ["netherlands"],
"20|openorgs____::74691942ddde3989519f008b551ebb35": ["netherlands"],
"20|openorgs____::8cf49add8601037458e4ecdbd63a1c42": ["netherlands"],
"20|openorgs____::e086dd85625f9fe8b89dfc2d81441664": ["netherlands"],
"20|openorgs____::06b2bdd4093a09b5f6c33521a7bf665c": ["netherlands"],
"20|openorgs____::a08a62c3e21dd693af009638e21aba11": ["netherlands"],
"20|openorgs____::0e7e1aa241c64ac17aee64c0be4be069": ["netherlands"],
"20|openorgs____::20e491a3310e7146d52161c825a980ec": ["netherlands"],
"20|openorgs____::43e6dc2e3207f47c82560767ed928bab": ["netherlands"],
"20|openorgs____::919801f3040011bbef2dd28020917c7f": ["netherlands"],
"20|openorgs____::58d238642a71a8cf41bd68c3e4189151": ["netherlands"],
"20|openorgs____::7ef128431ed7a002d362e73a048d3fd5": ["netherlands"],
"20|openorgs____::743576a59064697f297fba7e07d6e0fe": ["netherlands"],
"20|openorgs____::3c2c37b7745ae3202347c3fa594bc66a": ["netherlands"],
"20|openorgs____::51609e11886740e4cf5f77d0f516a43c": ["netherlands"],
"20|openorgs____::e1c228979333191a8f4bd6b5f5d01644": ["netherlands"],
"20|openorgs____::1eaec9171998c0db1b627afe4a969430": ["netherlands"],
"20|openorgs____::4bfe0847ab9fa240154a274f13fa7ce0": ["netherlands"],
"20|openorgs____::917ca88ffc8c464dc902998c0d89c24e": ["netherlands"],
"20|openorgs____::990dcc0a2b128304fdde55df641a47b4": ["netherlands"],
"20|openorgs____::bc3c03a56c8ac610e083f74395896ed4": ["netherlands"],
"20|openorgs____::e2d0ca0969cf3610d277907a57b8ae7e": ["netherlands"],
"20|openorgs____::1820f7ff911f4cf656fd63fd17b2fea8": ["netherlands"],
"20|openorgs____::30f7a1b7ffaca5405d4fc22b45b346f8": ["netherlands"],
"20|openorgs____::64ff9bf918bf9552b30ba878bbf3eba6": ["netherlands"],
"20|openorgs____::6a6ff788c9f916d156e88942155543c8": ["netherlands"],
"20|openorgs____::938a3404eacaedfe1b192d39178606eb": ["netherlands"],
"20|openorgs____::a8369fe646a573e841439d2c24396b49": ["netherlands"],
"20|openorgs____::af7d4ec9149a9d4f43affddd3a5f6ade": ["netherlands"],
"20|openorgs____::b411400e9cb43b74991acf165fdbaf42": ["netherlands"],
"20|openorgs____::e9afeb0f06cd2672f64eba5c50b58508": ["netherlands"],
"20|openorgs____::f58b02a65440082244bedce6c5edddee": ["netherlands"],
"20|openorgs____::1a2cec15d21fef305e2f185f45ec866e": ["netherlands"],
"20|openorgs____::2310574386b83b200559887761a0e6b0": ["netherlands"],
"20|openorgs____::39501045bd561c8e721966822a5e76bb": ["netherlands"],
"20|openorgs____::468e82b8147d8b607a4820d954e8092c": ["netherlands"],
"20|openorgs____::51aec3d04ac8acf29e7d0cc01e85d49d": ["netherlands"],
"20|openorgs____::52f890d61aa7be9be055cb220264bb56": ["netherlands"],
"20|openorgs____::52fa090596519ef4cc00ee6d01dafa5e": ["netherlands"],
"20|openorgs____::546b0193ea282a484f5ffba1cf72c31a": ["netherlands"],
"20|openorgs____::55137182dc586d7fbcbd0651083cfcaa": ["netherlands"],
"20|openorgs____::5543c511cc8eeee82f81d5841adaf0c3": ["netherlands"],
"20|openorgs____::60c2723e09a04c216d34506524c4833c": ["netherlands"],
"20|openorgs____::63df05ca62ff76a23d92aeee3cd08e83": ["netherlands"],
"20|openorgs____::6c7d3222989e73edcd396801036ac142": ["netherlands"],
"20|openorgs____::6e1e359b40468348dc462a5e6d0d2f4c": ["netherlands"],
"20|openorgs____::7f37484efc0f92cb9974774e58d355d9": ["netherlands"],
"20|openorgs____::88038a30254a7cbe80a16c8824b355f3": ["netherlands"],
"20|openorgs____::9a1b949c6af565ac3cc0846a5f7473b9": ["netherlands"],
"20|openorgs____::9c1db524c2294a2ed7df7420291cd434": ["netherlands"],
"20|openorgs____::a0f6de4744e6e49fd80e96e33846f7c9": ["netherlands"],
"20|openorgs____::a246b8c7dc06716086d9012c00dfed93": ["netherlands"],
"20|openorgs____::cd7fa52a4719d56f6393f01a7d0a74ce": ["netherlands"],
"20|openorgs____::ce11a4e92d5cf66ba65cc0e1b5994daf": ["netherlands"],
"20|openorgs____::ef725cb463f17996424c6f27d7183e1d": ["netherlands"],
"20|openorgs____::ff39371bb6f5f4484908ed48ea41ba00": ["netherlands"],
"20|openorgs____::1ed7562253b98ec918a4007a532a750c": ["netherlands"],
"20|openorgs____::221325bfd7557cd363138ee80a62a729": ["netherlands"],
"20|openorgs____::2927d186274099c1a3b26b2517e3c717": ["netherlands"],
"20|openorgs____::2b9422d5f5bac4aae99b24e61586d3d3": ["netherlands"],
"20|openorgs____::3ad482503246d0c6b356b3212947d0b9": ["netherlands"],
"20|openorgs____::42b878fb8ecaf9a40a2446179d4ac4a5": ["netherlands"],
"20|openorgs____::4de50ef535f9dfa948617ff3b12e192b": ["netherlands"],
"20|openorgs____::56ab3c45d37a3e0b6fd8657a202d6f9f": ["netherlands"],
"20|openorgs____::6ad8bf5ed17bbf8809c34472cf0e4b82": ["netherlands"],
"20|openorgs____::6d172d9141a159458a1794dcca4733f8": ["netherlands"],
"20|openorgs____::7150901687aad5b0346843fb4426f7e3": ["netherlands"],
"20|openorgs____::7bcd05e7e96471553d6f395ce8670455": ["netherlands"],
"20|openorgs____::a0811c572ed4100f50c16060b95e9ec8": ["netherlands"],
"20|openorgs____::bcf63d0cf192b2941f29bff28c7a9d6b": ["netherlands"],
"20|openorgs____::d3dd82a7d383eff0f4e0108e4ba2a0db": ["netherlands"],
"20|openorgs____::d6951a5a11a8a33c96bfb4a8ce80a8d6": ["netherlands"],
"20|openorgs____::dc993215d316a13417c8e56e23a25b87": ["netherlands"],
"20|openorgs____::df72cb8cb3ce7b3a2fc8cdb05188ee15": ["netherlands"],
"20|openorgs____::09827cce030ebf5e04e1c955532e57c1": ["netherlands"],
"20|openorgs____::1948def781ac9956fed96f81710351db": ["netherlands"],
"20|openorgs____::271c04c73c94df9a3fa8d45c586a2637": ["netherlands"],
"20|openorgs____::365d0c26f67165ea52d918cebc858817": ["netherlands"],
"20|openorgs____::5149d47489dad962ee077bf2587b62a2": ["netherlands"],
"20|openorgs____::6fcc9d96bbfa51a0d67921ad3cced500": ["netherlands"],
"20|openorgs____::806a351b3659612b064034e7255de413": ["netherlands"],
"20|openorgs____::a8d0b3f004237455d2650006b004f10a": ["netherlands"],
"20|openorgs____::ffddafb3850edee334c70abad176c506":["netherlands"],
"20|openorgs____::5d55fb216b14691cf68218daf5d78cd9":["tunet"],
"20|openorgs____::5352e8197bcc25b2b66bcc0788ae8b92":["tunet"],
"20|openorgs____::81317c69d1aad1574f1a39f145ffd335":["tunet"]
}</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="resumeFrom"/>
</ARCS>
</NODE>
<NODE isStart="true" name="setFundersBlacklist" type="SetEnvParameter">
<DESCRIPTION>Set a regex of funder shortnames to exclude from the project reference processing</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="parameterName" required="true" type="string">referenceextraction_project_fundingclass_blacklist_regex</PARAM>
<PARAM managedBy="user" name="parameterValue" required="true" type="string">^DFG::.*$|^CONICYT::.*$|^RSF::.*$|^SGOV::.*$|^GSRT::.*$|^MIUR::.*$|^INNOVIRIS::.*$|^RIF::.*$|^SFRS::.*$</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="resumeFrom"/>
</ARCS>
</NODE>
<NODE isJoin="true" name="resumeFrom" type="Selection">
<DESCRIPTION>Resume the workflow from a specific node</DESCRIPTION>
<PARAMETERS>
<PARAM function="validValues(['aggregatorGraph', 'promoteActionsRaw', 'graphEntityGrouping', 'graphCleaningFirst', 'graphResolution', 'IIS', 'promoteActionsIIS', 'dedup', 'promoteActionsDedup', 'orcidPropagation', 'bulkTagging', 'affiliationPropagation', 'affiliationSemRelPropagation', 'communityOrganizationPropagation', 'resultProjectPropagation', 'communitySemrelPropagation', 'countryPropagation', 'graphCleaning', 'blacklistRelations'])" managedBy="user" name="selection" required="true" type="string">communityOrganizationPropagation</PARAM>
</PARAMETERS>
<ARCS>
<ARC name="aggregatorGraph" to="aggregatorGraph"/>
<ARC name="promoteActionsRaw" to="promoteActionsRaw"/>
<ARC name="graphEntityGrouping" to="graphEntityGrouping"/>
<ARC name="graphCleaningFirst" to="graphCleaningFirst"/>
<ARC name="graphResolution" to="graphResolution"/>
<ARC name="IIS" to="select_IIS"/>
<ARC name="promoteActionsIIS" to="promoteActionsIIS"/>
<ARC name="dedup" to="duplicateScan"/>
<ARC name="promoteActionsDedup" to="promoteActionsDedup"/>
<ARC name="orcidPropagation" to="orcidPropagation"/>
<ARC name="bulkTagging" to="bulkTagging"/>
<ARC name="affiliationPropagation" to="affiliationPropagation"/>
<ARC name="affiliationSemRelPropagation" to="affiliationSemRelPropagation"/>
<ARC name="communityOrganizationPropagation" to="communityOrganizationPropagation"/>
<ARC name="resultProjectPropagation" to="resultProjectPropagation"/>
<ARC name="communitySemrelPropagation" to="communitySemrelPropagation"/>
<ARC name="countryPropagation" to="countryPropagation"/>
<ARC name="graphCleaning" to="graphCleaning"/>
<ARC name="blacklistRelations" to="blacklistRelations"/>
</ARCS>
</NODE>
<NODE name="aggregatorGraph" type="SubmitHadoopJob">
<DESCRIPTION>create the PROD AGGREGATOR graph</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'graphOutputPath' : 'aggregatorGraphPath',
'isLookupUrl' : 'isLookUpUrl',
'reuseDBClaims' : 'reuseDBClaims',
'reuseODFClaims' : 'reuseODFClaims',
'reuseOAFClaims' : 'reuseOAFClaims',
'reuseDB' : 'reuseDB',
'reuseDBOpenorgs' : 'reuseDBOpenorgs',
'reuseODF' : 'reuseODF',
'reuseODF_hdfs' : 'reuseODFhdfs',
'reuseOAF' : 'reuseOAF',
'reuseOAF_hdfs' : 'reuseOAFhdfs',
'contentPath' : 'contentPath',
'nsPrefixBlacklist' : 'nsPrefixBlacklist',
'shouldPatchRelations' : 'shouldPatchRelations',
'idMappingPath' : 'idMappingPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app',
'mongoURL' : 'mongodb://services.openaire.eu',
'mongoDb' : 'mdstore',
'mdstoreManagerUrl' : 'https://services.openaire.eu/mdstoremanager',
'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : 'dnet',
'postgresPassword' : 'dnetPwd',
'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.39:5432/oa_organizations',
'postgresOpenOrgsUser' : 'oa_organizations',
'postgresOpenOrgsPassword' : 'tei1igiLaPheeghu',
'shouldHashId' : 'true',
'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="aggregatorGraph_continue"/>
</ARCS>
</NODE>
<NODE name="aggregatorGraph_continue" type="Selection">
<DESCRIPTION>Continue after aggregator graph?</DESCRIPTION>
<PARAMETERS>
<PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">YES</PARAM>
</PARAMETERS>
<ARCS>
<ARC name="NO" to="success"/>
<ARC name="YES" to="promoteActionsRaw"/>
</ARCS>
</NODE>
<NODE name="promoteActionsRaw" type="SubmitHadoopJob">
<DESCRIPTION>create the RAW graph</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'inputActionSetIds' : 'actionSetIdsRawGraph',
'inputGraphRootPath' : 'aggregatorGraphPath',
'outputGraphRootPath' : 'rawGraphPath',
'isLookupUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G',
'activePromoteDatasetActionPayload' : 'true',
'activePromoteDatasourceActionPayload' : 'true',
'activePromoteOrganizationActionPayload' : 'true',
'activePromoteOtherResearchProductActionPayload' : 'true',
'activePromoteProjectActionPayload' : 'true',
'activePromotePublicationActionPayload' : 'true',
'activePromoteRelationActionPayload' : 'true',
'activePromoteResultActionPayload' : 'true',
'activePromoteSoftwareActionPayload' : 'true',
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
'shouldGroupById':'false',
'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="promoteActionsRaw_continue"/>
</ARCS>
</NODE>
<NODE name="promoteActionsRaw_continue" type="Selection">
<DESCRIPTION>Continue after RAW graph?</DESCRIPTION>
<PARAMETERS>
<PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">YES</PARAM>
</PARAMETERS>
<ARCS>
<ARC name="NO" to="success"/>
<ARC name="YES" to="graphEntityGrouping"/>
</ARCS>
</NODE>
<NODE name="graphEntityGrouping" type="SubmitHadoopJob">
<DESCRIPTION>Graph grouping</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'graphBasePath':'rawGraphPath',
'graphOutputPath':'groupedGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/group/oozie_app',
'workingPath' : '/tmp/prod_provision/working_dir/grouping',
'sparkExecutorCores' : '4',
'sparkExecutorMemory' : '7G'
}
</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="graphEntityGrouping_continue"/>
</ARCS>
</NODE>
<NODE name="graphEntityGrouping_continue" type="Selection">
<DESCRIPTION>Continue after grouped graph?</DESCRIPTION>
<PARAMETERS>
<PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">YES</PARAM>
</PARAMETERS>
<ARCS>
<ARC name="NO" to="success"/>
<ARC name="YES" to="graphCleaningFirst"/>
</ARCS>
</NODE>
<NODE name="graphCleaningFirst" type="SubmitHadoopJob">
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'graphInputPath' : 'groupedGraphPath',
'graphOutputPath': 'cleanedFirstGraphPath',
'isLookupUrl': 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/clean',
'shouldClean' : 'false'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="patchHostedBy"/>
</ARCS>
</NODE>
<NODE name="patchHostedBy" type="SubmitHadoopJob">
<DESCRIPTION>updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'resumeFrom' : 'HBMresumeFrom',
'sourcePath' : 'cleanedFirstGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'hostedByMapPath' : '/user/dnet.production/data/hostedByMap',
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '12G',
'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap',
'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap',
'unibiFileURL':'https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv',
'doajJsonFileURL':'https://doaj.org/public-data-dump/journal',
'dumpPath':'/tmp/prod_provision/working_dir/hostedbymap',
'dumpFileName':'doaj_json.tar.gz'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="graphResolution"/>
</ARCS>
</NODE>
<NODE name="graphResolution" type="SubmitHadoopJob">
<DESCRIPTION>Resolve Relation</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'graphBasePath':'cleanedFirstGraphPath',
'unresolvedPath' :'unresolvedEntityPath',
'targetPath':'resolvedGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution',
'shouldCleanContext' : 'false',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '12G'
}
</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="select_IIS"/>
</ARCS>
</NODE>
<NODE name="select_IIS" type="Selection">
<DESCRIPTION>Shall we run IIS?</DESCRIPTION>
<PARAMETERS>
<PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">YES</PARAM>
</PARAMETERS>
<ARCS>
<ARC name="YES" to="prepareIISActionSets"/>
<ARC name="NO" to="promoteActionsIIS"/>
</ARCS>
</NODE>
<NODE name="prepareIISActionSets" type="PrepareActionSets">
<DESCRIPTION>prepare IIS action sets</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="sets" required="true" type="string">
[
{
'set' : 'iis-eosc-service-link',
'jobProperty' : 'export_action_set_id_document_eoscServices',
'enablingProperty' : 'active_referenceextraction_eoscservice',
'enabled' : 'false'
},
{
'set' : 'iis-document-affiliation',
'jobProperty' : 'export_action_set_id_matched_doc_organizations',
'enablingProperty' : 'active_document_affiliation',
'enabled' : 'false'
},
{
'set' : 'iis-referenced-projects-main',
'jobProperty' : 'export_action_set_id_document_referencedProjects',
'enablingProperty' : 'active_referenceextraction_project',
'enabled' : 'true'
},
{
'set' : 'iis-referenced-datasets-main',
'jobProperty' : 'export_action_set_id_document_referencedDatasets',
'enablingProperty' : 'active_referenceextraction_dataset',
'enabled' : 'false'
},
{
'set' : 'iis-researchinitiative',
'jobProperty' : 'export_action_set_id_document_research_initiative',
'enablingProperty' : 'active_referenceextraction_researchinitiative',
'enabled' : 'false'
},
{
'set' : 'iis-document-similarities',
'jobProperty' : 'export_action_set_id_document_similarities_standard',
'enablingProperty' : 'active_documentssimilarity',
'enabled' : 'false'
},
{
'set' : 'iis-document-classes',
'jobProperty' : 'export_action_set_id_document_classes',
'enablingProperty' : 'active_documentsclassification',
'enabled' : 'false'
},
{
'set' : 'iis-document-citations',
'jobProperty' : 'export_action_set_id_document_referencedDocuments',
'enablingProperty' : 'active_citationmatching',
'enabled' : 'false'
},
{
'set' : 'iis-document-citations-relations',
'jobProperty' : 'export_action_set_id_citation_relations',
'enablingProperty' : 'active_citationmatching_relations',
'enabled' : 'false'
},
{
'set' : 'iis-referenceextraction-pdb',
'jobProperty' : 'export_action_set_id_document_pdb',
'enablingProperty' : 'active_referenceextraction_pdb',
'enabled' : 'false'
},
{
'set' : 'document_software_url',
'jobProperty' : 'export_action_set_id_document_software_url',
'enablingProperty' : 'active_referenceextraction_software_url',
'enabled' : 'false'
},
{
'set' : 'iis-entities-software',
'jobProperty' : 'export_action_set_id_entity_software',
'enablingProperty' : 'active_referenceextraction_software_url',
'enabled' : 'false'
},
{
'set' : 'iis-communities',
'jobProperty' : 'export_action_set_id_document_community',
'enablingProperty' : 'active_referenceextraction_community',
'enabled' : 'false'
},
{
'set' : 'iis-referenced-patents',
'jobProperty' : 'export_action_set_id_document_patent',
'enablingProperty' : 'active_referenceextraction_patent',
'enabled' : 'false'
},
{
'set' : 'iis-entities-patent',
'jobProperty' : 'export_action_set_id_entity_patent',
'enablingProperty' : 'active_referenceextraction_patent',
'enabled' : 'false'
},
{
'set' : 'iis-covid-19',
'jobProperty' : 'export_action_set_id_document_covid19',
'enablingProperty' : 'active_referenceextraction_covid19',
'enabled' : 'false'
}
]
</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="prepareIISParameters"/>
</ARCS>
</NODE>
<NODE name="prepareIISParameters" type="PrepareIisMainParamsV2">
<DESCRIPTION>prepare IIS parameters</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="islookupLocationParam" required="true" type="string">import_islookup_service_location</PARAM>
<PARAM managedBy="system" name="objectStoreParam" required="true" type="string">import_content_objectstores_csv</PARAM>
<PARAM managedBy="system" name="objectStoreLocationParam" required="true" type="string">import_content_object_store_location</PARAM>
<PARAM managedBy="system" name="mdStoreStoreLocationParam" required="true" type="string">import_mdstore_service_location</PARAM>
<PARAM managedBy="system" name="mdStoreDatasetParam" required="true" type="string">import_dataset_mdstore_ids_csv</PARAM>
<PARAM managedBy="system" name="oozieWfAppPathParam" required="true" type="string">oozie.wf.application.path</PARAM>
<PARAM managedBy="user" name="oozieWfAppPath" required="true" type="string">/lib/iis/primary/snapshots/2023-05-24_dhp-schemas_2.13.1-patched</PARAM>
<PARAM function="validValues(['IIS','DM'])" managedBy="user" name="clusterName" required="true" type="string">IIS</PARAM>
<PARAM managedBy="user" name="importHbaseDumpLocation" required="true" type="string">deprecated - not used</PARAM>
<PARAM managedBy="system" name="importHbaseDumpLocationParam" required="true" type="string">import_infospace_graph_location</PARAM>
<PARAM managedBy="user" name="objectStoreBlacklistCSV" required="false" type="string"/>
<PARAM managedBy="system" name="importProjectConceptsContextCSVParam" required="true" type="string">import_project_concepts_context_ids_csv</PARAM>
<PARAM managedBy="user" name="importProjectConceptsContextCSV" required="true" type="string">argo-france,aurora,beopen,citizen-science,clarin,covid-19,dariah,dh-ch,dth,oa-pg,ebrains,egi,egrise,elixir-gr,embrc,enermaps,epos,eu-conexus,eut,eutopia,fet-fp7,fet-h2020,forthem,gotriple,heritage-science,inspired-ris,instruct,iperionhs,knowmad,lifewatch-eric,mes,neanias-atmospheric,neanias-underwater,neanias-space,netherlands,ni,north-american-studies,openaire-infrastructure,rda,risis,rural-digital-europe,science-innovation-policy,sdsn-gr,sobigdata,tunet</PARAM>
<PARAM managedBy="system" name="importServicesCollectedFromParam" required="true" type="string">import_infospace_eligible_service_collectedfrom_datasourceid</PARAM>
<PARAM managedBy="user" name="importServicesCollectedFrom" required="true" type="string">10|openaire____::2e06c1122c7df43765fdcf91080824fa</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="IIS_main"/>
</ARCS>
</NODE>
<NODE name="IIS_main" type="SubmitHadoopJob">
<DESCRIPTION>IIS main</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">iisMainJobV3</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'cluster' : 'cluster',
'oozie.wf.application.path' : 'oozie.wf.application.path',
'referenceextraction_project_fundingclass_blacklist_regex' : 'referenceextraction_project_fundingclass_blacklist_regex',
'active_document_affiliation' : 'active_document_affiliation',
'active_referenceextraction_project' : 'active_referenceextraction_project',
'active_referenceextraction_dataset' : 'active_referenceextraction_dataset',
'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative',
'active_documentsclassification' : 'active_documentsclassification',
'active_documentssimilarity' : 'active_documentssimilarity',
'active_citationmatching' : 'active_citationmatching',
'active_citationmatching_relations' : 'active_citationmatching_relations',
'active_referenceextraction_pdb' : 'active_referenceextraction_pdb',
'active_referenceextraction_software_url' : 'active_referenceextraction_software_url',
'active_referenceextraction_community' : 'active_referenceextraction_community',
'active_referenceextraction_patent' : 'active_referenceextraction_patent',
'active_referenceextraction_covid19' : 'active_referenceextraction_covid19',
'active_referenceextraction_eoscservice' : 'active_referenceextraction_eoscservice',
'import_content_objectstores_csv' : 'import_content_objectstores_csv',
'import_content_object_store_location' : 'import_content_object_store_location',
'import_mdstore_service_location' : 'import_mdstore_service_location',
'import_islookup_service_location' : 'import_islookup_service_location',
'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv',
'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv',
'import_infospace_graph_location' : 'resolvedGraphPath',
'import_infospace_eligible_service_collectedfrom_datasourceid' : 'import_infospace_eligible_service_collectedfrom_datasourceid',
'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations',
'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets',
'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects',
'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative',
'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard',
'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments',
'export_action_set_id_citation_relations' : 'export_action_set_id_citation_relations',
'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb',
'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url',
'export_action_set_id_entity_software' : 'export_action_set_id_entity_software',
'export_action_set_id_document_community' : 'export_action_set_id_document_community',
'export_action_set_id_document_patent' : 'export_action_set_id_document_patent',
'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent',
'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19',
'export_action_set_id_document_classes' : 'export_action_set_id_document_classes',
'export_action_set_id_document_eoscServices' : 'export_action_set_id_document_eoscServices'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'import_content_pdfaggregation_table_name' : 'pdfaggregation_i.payload'
}
</PARAM>
<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="updateIISActionSets"/>
</ARCS>
</NODE>
<NODE name="updateIISActionSets" type="UpdateActionSets">
<DESCRIPTION>update IIS action sets</DESCRIPTION>
<PARAMETERS/>
<ARCS>
<ARC to="promoteActionsIIS"/>
</ARCS>
</NODE>
<NODE name="promoteActionsIIS" type="SubmitHadoopJob">
<DESCRIPTION>create the INFERRED graph</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'inputActionSetIds' : 'actionSetIdsIISGraph',
'inputGraphRootPath' : 'resolvedGraphPath',
'outputGraphRootPath' : 'inferredGraphPath',
'isLookupUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '12G',
'activePromoteDatasetActionPayload' : 'true',
'activePromoteDatasourceActionPayload' : 'true',
'activePromoteOrganizationActionPayload' : 'true',
'activePromoteOtherResearchProductActionPayload' : 'true',
'activePromoteProjectActionPayload' : 'true',
'activePromotePublicationActionPayload' : 'true',
'activePromoteRelationActionPayload' : 'true',
'activePromoteResultActionPayload' : 'true',
'activePromoteSoftwareActionPayload' : 'true',
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
'shouldGroupById':'false',
'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="duplicateScan"/>
</ARCS>
</NODE>
<NODE name="duplicateScan" type="SubmitHadoopJob">
<DESCRIPTION>search for duplicates in the raw graph</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'actionSetId' : 'dedupConfig',
'graphBasePath' : 'inferredGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app',
'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
'workingPath' : '/tmp/prod_provision/working_dir/dedup',
'whiteListPath' : '/data/dedup/whitelist_prod',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G',
'sparkDriverMemory' : '6G'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="dedupConsistency"/>
</ARCS>
</NODE>
<NODE name="dedupConsistency" type="SubmitHadoopJob">
<DESCRIPTION>mark duplicates as deleted and redistribute the relationships</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'graphBasePath' : 'dedupGraphPath',
'graphOutputPath': 'consistentGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app',
'workingPath' : '/tmp/prod_provision/working_dir/dedup',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '10G'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="promoteActionsDedup"/>
</ARCS>
</NODE>
<NODE name="promoteActionsDedup" type="SubmitHadoopJob">
<DESCRIPTION>applies the actionsets referring to deduped records</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'inputActionSetIds' : 'actionSetIdsDedup',
'inputGraphRootPath' : 'consistentGraphPath',
'outputGraphRootPath' : 'dedupEnrichedGraphPath',
'isLookupUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G',
'activePromoteDatasetActionPayload' : 'true',
'activePromoteDatasourceActionPayload' : 'true',
'activePromoteOrganizationActionPayload' : 'true',
'activePromoteOtherResearchProductActionPayload' : 'true',
'activePromoteProjectActionPayload' : 'true',
'activePromotePublicationActionPayload' : 'true',
'activePromoteRelationActionPayload' : 'true',
'activePromoteResultActionPayload' : 'true',
'activePromoteSoftwareActionPayload' : 'true',
'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
'shouldGroupById':'true',
'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsDedup'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="promoteActionsDedup_continue"/>
</ARCS>
</NODE>
<NODE name="promoteActionsDedup_continue" type="Selection">
<DESCRIPTION>Continue after dedup enriched graph?</DESCRIPTION>
<PARAMETERS>
<PARAM function="validValues(['YES', 'NO'])" managedBy="user" name="selection" required="true" type="string">YES</PARAM>
</PARAMETERS>
<ARCS>
<ARC name="NO" to="success"/>
<ARC name="YES" to="orcidPropagation"/>
</ARCS>
</NODE>
<NODE name="orcidPropagation" type="SubmitHadoopJob">
<DESCRIPTION>propagates ORCID among results linked by allowedsemrels semantic relationships</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'dedupEnrichedGraphPath',
'outputPath': 'orcidGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/orcid',
'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo',
'saveGraph' : 'true',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="bulkTagging"/>
</ARCS>
</NODE>
<NODE name="bulkTagging" type="SubmitHadoopJob">
<DESCRIPTION>mark results respecting some rules as belonging to communities</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'orcidGraphPath',
'outputPath': 'bulkTaggingGraphPath',
'isLookUpUrl' : 'isLookUpUrl',
'pathMap' : 'bulkTaggingPathMap'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : 'dnet',
'postgresPassword' : 'dnetPwd',
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/bulktag',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '10G'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="affiliationPropagation"/>
</ARCS>
</NODE>
<NODE name="affiliationPropagation" type="SubmitHadoopJob">
<DESCRIPTION>creates relashionships between results and organizations when the organizations are associated to institutional repositories</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'bulkTaggingGraphPath',
'outputPath': 'affiliationGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/affiliation',
'saveGraph' : 'true',
'blacklist' : 'empty'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="affiliationSemRelPropagation"/>
</ARCS>
</NODE>
<NODE name="affiliationSemRelPropagation" type="SubmitHadoopJob">
<DESCRIPTION>creates relashionships between results and organizations when the organizations are in parent/child relationships. The childs to the parents</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'affiliationGraphPath',
'outputPath': 'affiliationSemRelGraphPath',
'iterations':'iterations'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation_semrel/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/affiliationsemrel',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '10G'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="communityOrganizationPropagation"/>
</ARCS>
</NODE>
<NODE name="communityOrganizationPropagation" type="SubmitHadoopJob">
<DESCRIPTION>marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'affiliationSemRelGraphPath',
'outputPath': 'communityOrganizationGraphPath',
'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/community_organization',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '12G',
'saveGraph' : 'true'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="resultProjectPropagation"/>
</ARCS>
</NODE>
<NODE name="resultProjectPropagation" type="SubmitHadoopJob">
<DESCRIPTION>created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'communityOrganizationGraphPath',
'outputPath': 'fundingGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/funding',
'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo',
'saveGraph' : 'true',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '10G'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="communitySemrelPropagation"/>
</ARCS>
</NODE>
<NODE name="communitySemrelPropagation" type="SubmitHadoopJob">
<DESCRIPTION>tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities </DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'fundingGraphPath',
'outputPath': 'communitySemRelGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/community_semrel',
'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '10G',
'saveGraph' : 'true'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="countryPropagation"/>
</ARCS>
</NODE>
<NODE name="countryPropagation" type="SubmitHadoopJob">
<DESCRIPTION>associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from </DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'communitySemRelGraphPath',
'outputPath': 'countryGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '10G',
'workingDir' : '/tmp/prod_provision/working_dir/country',
'allowedtypes' : 'pubsrepository::institutional',
'whitelist':'10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48',
'saveGraph' : 'true'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="graphCleaning"/>
</ARCS>
</NODE>
<NODE name="graphCleaning" type="SubmitHadoopJob">
<DESCRIPTION>clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid</DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'graphInputPath' : 'countryGraphPath',
'graphOutputPath': 'cleanedGraphPath',
'isLookupUrl': 'isLookUpUrl'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : 'dnet',
'postgresPassword' : 'dnetPwd',
'verifyCountryParam' : '10.17632;10.5061',
'collectedfrom' : 'NARCIS',
'country' : 'NL',
'sparkExecutorCores' : '3',
'sparkExecutorMemory' : '10G',
'workingDir' : '/tmp/prod_provision/working_dir/clean',
'shouldClean' : 'true'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="blacklistRelations"/>
</ARCS>
</NODE>
<NODE name="blacklistRelations" type="SubmitHadoopJob">
<DESCRIPTION>removes blacklisted relations </DESCRIPTION>
<PARAMETERS>
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
<PARAM managedBy="system" name="envParams" required="true" type="string">
{
'sourcePath' : 'cleanedGraphPath',
'outputPath': 'blacklistedGraphPath'
}
</PARAM>
<PARAM managedBy="system" name="params" required="true" type="string">
{
'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app',
'workingDir' : '/tmp/prod_provision/working_dir/blacklist',
'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : 'dnet',
'postgresPassword' : 'dnetPwd',
'sparkExecutorCores' : '2',
'sparkExecutorMemory' : '10G'
}
</PARAM>
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
</PARAMETERS>
<ARCS>
<ARC to="success"/>
</ARCS>
</NODE>
</CONFIGURATION>
<STATUS>
<LAST_EXECUTION_ID>wf_20230605_132025_368</LAST_EXECUTION_ID>
<LAST_EXECUTION_DATE>2023-06-05T13:21:02+00:00</LAST_EXECUTION_DATE>
<LAST_EXECUTION_STATUS>FAILURE</LAST_EXECUTION_STATUS>
<LAST_EXECUTION_ERROR>eu.dnetlib.data.hadoop.rmi.HadoopServiceException: hadoop job: 0011965-221214081318119-oozie-oozi-W failed with status: KILLED, oozie log: 2023-06-05 13:20:37,012 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[] No results found 2023-06-05 13:20:37,032 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@:start:] Start action [0011965-221214081318119-oozie-oozi-W@:start:] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2023-06-05 13:20:37,032 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@:start:] [***0011965-221214081318119-oozie-oozi-W@:start:***]Action status=DONE 2023-06-05 13:20:37,032 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@:start:] [***0011965-221214081318119-oozie-oozi-W@:start:***]Action updated in DB! 2023-06-05 13:20:37,078 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@:start:] No results found 2023-06-05 13:20:37,093 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@:start:] No Notification URL is defined. Therefore nothing to notify for job 0011965-221214081318119-oozie-oozi-W@:start: 2023-06-05 13:20:37,093 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0011965-221214081318119-oozie-oozi-W 2023-06-05 13:20:37,140 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@reset_outputpath] Start action [0011965-221214081318119-oozie-oozi-W@reset_outputpath] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2023-06-05 13:20:48,251 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@reset_outputpath] [***0011965-221214081318119-oozie-oozi-W@reset_outputpath***]Action status=DONE 2023-06-05 13:20:48,251 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@reset_outputpath] [***0011965-221214081318119-oozie-oozi-W@reset_outputpath***]Action updated in DB! 2023-06-05 13:20:48,299 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.production] GROUP[-] TOKEN[] APP[community_to_result_propagation] JOB[0011965-221214081318119-oozie-oozi-W] ACTION[0011965-221214081318119-oozie-oozi-W@reset_outputpath] No results found
<LAST_EXECUTION_OUTPUT name="mainlog:selection">communityOrganizationPropagation</LAST_EXECUTION_OUTPUT>
</STATUS>
</BODY>
</RESOURCE_PROFILE>