From 04ad8969b20859112ad49bae2bcebd5841b5b457 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Oct 2020 15:46:55 +0100 Subject: [PATCH] anticipated execution of the graph cleaning workflow --- .../wf/profiles/graph_prod_construction.xml | 66 +++++++++++++++---- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml index 0474333203..4eab12c73a 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml @@ -44,6 +44,7 @@ + Set the target path to store the RAW graph @@ -54,31 +55,45 @@ + + + Set the target path to store the first CLEANED graph + + firstCleanedGraphPath + /tmp/prod_provision/graph/02_graph_first_cleaned + + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_provision/graph/02_graph_dedup + /tmp/beta_provision/graph/03_graph_dedup + Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_provision/graph/03_graph_inferred + /tmp/beta_provision/graph/04_graph_inferred + Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_provision/graph/04_graph_consistent + /tmp/beta_provision/graph/05_graph_consistent @@ -89,7 +104,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_provision/graph/05_graph_orcid + /tmp/beta_provision/graph/06_graph_orcid @@ -100,7 +115,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_provision/graph/06_graph_bulktagging + /tmp/beta_provision/graph/07_graph_bulktagging @@ -111,7 +126,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_provision/graph/07_graph_affiliation + /tmp/beta_provision/graph/08_graph_affiliation @@ -122,7 +137,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_provision/graph/08_graph_comunity_organization + /tmp/beta_provision/graph/09_graph_comunity_organization @@ -133,7 +148,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_provision/graph/09_graph_funding + /tmp/beta_provision/graph/10_graph_funding @@ -144,7 +159,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_provision/graph/10_graph_comunity_sem_rel + /tmp/beta_provision/graph/11_graph_comunity_sem_rel @@ -155,7 +170,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_provision/graph/11_graph_country + /tmp/beta_provision/graph/12_graph_country @@ -166,7 +181,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_provision/graph/12_graph_cleaned + /tmp/beta_provision/graph/13_graph_cleaned @@ -177,7 +192,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_provision/graph/13_graph_blacklisted + /tmp/beta_provision/graph/14_graph_blacklisted @@ -324,6 +339,31 @@ build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'firstCleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', + 'workingPath' : '/tmp/beta_provision/working_dir/first_clean' + } + + build-report + @@ -337,7 +377,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'rawGraphPath', + 'graphBasePath' : 'firstCleanedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' }