From 577f3b1ac8696abf405de9969f4efbd81d8bba95 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 9 Aug 2021 11:53:58 +0200 Subject: [PATCH] added dnet workflows responsible for the graph construction, enrichment, provision --- .../actionset_bipFinderScores.xml | 100 ++ .../dhp/actionmanager/actionset_datacite.xml | 144 +++ .../dhp/actionmanager/actionset_doiboost.xml | 200 ++++ .../actionset_h2020_classification.xml | 132 +++ .../actionset_orcidworks-no-doi.xml | 101 ++ .../dhp/actionmanager/actionset_ror.xml | 89 ++ .../dhp/provision/00_beta_graph_for_IIS.xml | 628 +++++++++++ .../dhp/provision/00_prod_graph_for_IIS.xml | 437 ++++++++ .../eu/dnetlib/dhp/provision/01_IIS.xml | 225 ++++ .../dnetlib/dhp/provision/02_beta_graph.xml | 995 ++++++++++++++++++ .../dnetlib/dhp/provision/02_prod_graph.xml | 778 ++++++++++++++ .../dnetlib/dhp/provision/03_graph2hive.xml | 74 ++ .../dnetlib/dhp/provision/04_graph2solr.xml | 99 ++ .../dnetlib/dhp/provision/05_graph2stats.xml | 100 ++ .../dhp/provision/06_publish_stats.xml | 87 ++ .../eu/dnetlib/dhp/provision/07_broker.xml | 131 +++ 16 files changed, 4320 insertions(+) create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml new file mode 100644 index 000000000..e4680a0cf --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Import bipFinder scores + Import bipFinder scores + 30 + + + declares the path holding the BIP SCORE data + + bipScorePath + /data/bip/20201206 + + + + + + + declares the path holding the LATEST GRAPH dump + + latestGraphPath + /tmp/stable_ids/graph/14_graph_blacklisted + + + + + + + prepare action sets + + + [ + { + 'set' : 'bipfinder-scores', + 'jobProperty' : 'export_action_set_bipfinder-scores', + 'enablingProperty' : 'active_bipfinder-scores', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare AS for the bipFinder scores integration + + executeOozieJob + IIS + + { + 'bipScorePath':'bipScorePath', + 'inputPath':'latestGraphPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/bipfinder/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bipfinder' + } + + build-report + + + + + + + update action sets + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml new file mode 100644 index 000000000..d2ea9d35f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml @@ -0,0 +1,144 @@ + +
+ + + + + +
+ + Import Datacite ActionSet + Import InfoSpace + 30 + + + set the resume from + + resumeFrom + TransformDatacite + + + + + + + shall the datacite mapping produce the links? + + exportLinks + false + + + + + + + set the path storing the OAF Datacite records + + oafTargetPath + /data/datacite/production/datacite_oaf + + + + + + + set the input path for Datacite content + + datacitePath + /data/datacite + + + + + + + prepare action sets + + + [ + { + 'set' : 'datacite', + 'jobProperty' : 'export_action_set_datacite', + 'enablingProperty' : 'active_datacite', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'mainPath' : 'datacitePath', + 'oafTargetPath' : 'oafTargetPath', + 'exportLinks' : 'exportLinks', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_import/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'sourcePath' : 'oafTargetPath', + 'outputPath' : 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_actionset/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210723_163342_752 + 2021-07-23T16:44:05+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml new file mode 100644 index 000000000..ce9eb8f4c --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml @@ -0,0 +1,200 @@ + +
+ + + + + +
+ + Import DOIboost + Import InfoSpace + 30 + + + set the input path for MAG + + MAGDumpPath + /data/doiboost/mag-2021-02-15 + + + + + + + set the input path for CROSSREF dump + + crossrefDumpPath + /data/doiboost/crossref/ + + + + + + + set the intermediate path used to process MAG + + intermediatePathMAG + /data/doiboost/input/mag + + + + + + + set the input path for Crossref + + inputPathCrossref + /data/doiboost/input/crossref + + + + + + + set the timestamp for the Crossref incremental harvesting + + crossrefTimestamp + 1607614921429 + + + + + + + set the input path for UnpayWall + + inputPathUnpayWall + /data/doiboost/input/unpayWall + + + + + + + set the input path for ORCID + + inputPathOrcid + /data/orcid_activities_2020/last_orcid_dataset + + + + + + + set the working path for ORCID + + workingPathOrcid + /data/doiboost/input/orcid + + + + + + + set the hostedBy map path + + hostedByMapPath + /data/doiboost/input/hostedBy/hbMap.gz + + + + + + + set the oozie workflow name from which the execution will be resumed + + resumeFrom + ConvertCrossrefToOAF + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'doiboost', + 'jobProperty' : 'export_action_set_doiboost', + 'enablingProperty' : 'active_doiboost', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of DOIBoost + + executeOozieJob + IIS + + { + 'crossrefTimestamp' : 'crossrefTimestamp', + 'hostedByMapPath' : 'hostedByMapPath', + 'MAGDumpPath' :'MAGDumpPath', + 'inputPathMAG' : 'intermediatePathMAG', + 'inputPathCrossref' : 'inputPathCrossref', + 'crossrefDumpPath':'crossrefDumpPath', + 'inputPathUnpayWall' : 'inputPathUnpayWall', + 'inputPathOrcid' : 'inputPathOrcid', + 'outputPath' : 'outputPath', + 'workingPathOrcid':'workingPathOrcid', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/doiboost_process/oozie_app', + 'workingPath' : '/data/doiboost/process_p', + 'sparkExecutorCores' : '2', + 'sparkExecutorIntersectionMemory' : '12G', + 'sparkExecutorMemory' : '8G', + 'esServer' : '[es_server]', + 'esIndex' : 'crossref' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210714_075237_381 + 2021-07-14T09:51:46+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml new file mode 100644 index 000000000..6d29e25a1 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml @@ -0,0 +1,132 @@ + +
+ + + + + +
+ + Import H2020classification + Import H2020classification + 30 + + + sets the URL to download the project file + + projectFileURL + https://cordis.europa.eu/data/cordis-h2020projects.csv + + + + + + + sets the URL to download the programme file + + programmeFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv + + + + + + + sets the URL to download the topics file + + topicFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx + + + + + + + sets the name of the sheet in the topic file to be read + + sheetName + Topics + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'h2020classification', + 'jobProperty' : 'export_action_set_h2020classification', + 'enablingProperty' : 'active_h2020classification', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the H2020 Classification + + executeOozieJob + IIS + + { + 'outputPath': 'outputPath', + 'sheetName':'sheetName', + 'projectFileURL' : 'projectFileURL', + 'programmeFileURL' : 'programmeFileURL', + 'topicFileURL':'topicFileURL' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/project/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/h2020classification', + 'postgresURL':'', + 'postgresUser':'', + 'postgresPassword':'' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210524_084803_740 + 2021-05-24T09:05:50+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml new file mode 100644 index 000000000..c5642dadc --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml @@ -0,0 +1,101 @@ + +
+ + + + + +
+ + Import Orcid + Import InfoSpace + 30 + + + set the hdfs input path + + inputPath + /data/orcid_activities_2020 + + + + + + + set the temporary path where to store the action set + + processOutputPath + /tmp/prod_provision/working_path_orcid_activities + + + + + + + prepare action sets + + + [ + { + 'set' : 'orcidworks-no-doi', + 'jobProperty' : 'export_action_set_orcidworks_no_doi', + 'enablingProperty' : 'active_orcidworks_no_doi', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the Orcid No Doi + + executeOozieJob + IIS + + { + 'workingPath' : 'inputPath', + 'processOutputPath' : 'processOutputPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/orcidnodoi_actionset/oozie_app', + 'spark2GenNoDoiDatasetMaxExecutors' : '200', + 'spark2GenNoDoiDatasetExecutorMemory' : '2G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210713_170819_470 + 2021-07-13T17:28:26+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml new file mode 100644 index 000000000..4810fda3b --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml @@ -0,0 +1,89 @@ + +
+ + + + + +
+ + Update ROR actionset + Import Infospace + 30 + + + Set the base path containing the no_doi_dataset folder + + inputPath + /data/ror/ror-data-2021-04-06.json + + + + + + + prepare action sets + + + [ + { + 'set' : 'ror', + 'jobProperty' : 'export_action_set_ror', + 'enablingProperty' : 'active_ror', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + update the ROR actionset + + executeOozieJob + IIS + + { + 'rorJsonInputPath' : 'inputPath', + 'rorActionSetPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/ror/oozie_app', + 'workingDir': '/tmp/import_ror_actionset_prod' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210518_143542_478 + 2021-05-18T14:37:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml new file mode 100644 index 000000000..ef2205e32 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -0,0 +1,628 @@ + +
+ + + + + +
+ + Graph construction for IIS [BETA] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_inference/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_inference/graph/02_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/beta_inference/graph/03_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_inference/graph/04_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_inference/graph/05_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_inference/graph/06_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + false + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_inference/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + false + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_inference/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210730_094240_462 + 2021-07-30T15:04:19+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml new file mode 100644 index 000000000..e5ce3d710 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -0,0 +1,437 @@ + +
+ + + + + +
+ + Graph construction for IIS [PROD NEW] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_inference/graph/00_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_inference/graph/01_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/prod_inference/graph/02_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_inference/graph/03_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_inference/graph/04_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_inference/graph/05_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + should apply the relations id patching based on the provided idMapping? + + shouldPatchRelations + false + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210719_165159_86 + 2021-07-19T20:45:09+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml new file mode 100644 index 000000000..126d5f58d --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml @@ -0,0 +1,225 @@ + +
+ + + + + +
+ + IIS main workflow V3 [PROD] + IIS + 30 + + + start + + + + + + + Set a regex of funder shortnames to exclude from the project reference processing + + referenceextraction_project_fundingclass_blacklist_regex + ^DFG::.*$|^CONICYT::.*$|^RSF::.*$|^SGOV::.*$|^GSRT::.*$|^MIUR::.*$|^INNOVIRIS::.*$|^RIF::.*$|^SFRS::.*$ + + + + + + + prepare action sets + + + [ + { + 'set' : 'iis-document-affiliation', + 'jobProperty' : 'export_action_set_id_matched_doc_organizations', + 'enablingProperty' : 'active_document_affiliation', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-projects-main', + 'jobProperty' : 'export_action_set_id_document_referencedProjects', + 'enablingProperty' : 'active_referenceextraction_project', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-datasets-main', + 'jobProperty' : 'export_action_set_id_document_referencedDatasets', + 'enablingProperty' : 'active_referenceextraction_dataset', + 'enabled' : 'true' + }, + { + 'set' : 'iis-researchinitiative', + 'jobProperty' : 'export_action_set_id_document_research_initiative', + 'enablingProperty' : 'active_referenceextraction_researchinitiative', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-similarities', + 'jobProperty' : 'export_action_set_id_document_similarities_standard', + 'enablingProperty' : 'active_documentssimilarity', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-classes', + 'jobProperty' : 'export_action_set_id_document_classes', + 'enablingProperty' : 'active_documentsclassification', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations', + 'jobProperty' : 'export_action_set_id_document_referencedDocuments', + 'enablingProperty' : 'active_citationmatching', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations-relations', + 'jobProperty' : 'export_action_set_id_citation_relations', + 'enablingProperty' : 'active_citationmatching_relations', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenceextraction-pdb', + 'jobProperty' : 'export_action_set_id_document_pdb', + 'enablingProperty' : 'active_referenceextraction_pdb', + 'enabled' : 'true' + }, + { + 'set' : 'document_software_url', + 'jobProperty' : 'export_action_set_id_document_software_url', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-software', + 'jobProperty' : 'export_action_set_id_entity_software', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-communities', + 'jobProperty' : 'export_action_set_id_document_community', + 'enablingProperty' : 'active_referenceextraction_community', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-patents', + 'jobProperty' : 'export_action_set_id_document_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-patent', + 'jobProperty' : 'export_action_set_id_entity_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-covid-19', + 'jobProperty' : 'export_action_set_id_document_covid19', + 'enablingProperty' : 'active_referenceextraction_covid19', + 'enabled' : 'true' + } + ] + + + + + + + + prepare parameters + + import_islookup_service_location + import_content_objectstores_csv + import_content_object_store_location + import_mdstore_service_location + import_dataset_mdstore_ids_csv + oozie.wf.application.path + /lib/iis/primary/snapshots/2021-06-23 + IIS + /tmp/prod_inference/graph/05_graph_cleaned + import_infospace_graph_location + + import_project_concepts_context_ids_csv + aginfra,beopen,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,enermaps,epos,fam,fet-fp7,fet-h2020,gotriple,instruct,mes,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata + + + + + + + IIS main + + iisMainJobV3 + + { + 'cluster' : 'cluster', + 'oozie.wf.application.path' : 'oozie.wf.application.path', + 'referenceextraction_project_fundingclass_blacklist_regex' : 'referenceextraction_project_fundingclass_blacklist_regex', + + 'active_document_affiliation' : 'active_document_affiliation', + 'active_referenceextraction_project' : 'active_referenceextraction_project', + 'active_referenceextraction_dataset' : 'active_referenceextraction_dataset', + 'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative', + 'active_documentsclassification' : 'active_documentsclassification', + 'active_documentssimilarity' : 'active_documentssimilarity', + 'active_citationmatching' : 'active_citationmatching', + 'active_citationmatching_relations' : 'active_citationmatching_relations', + 'active_referenceextraction_pdb' : 'active_referenceextraction_pdb', + 'active_referenceextraction_software_url' : 'active_referenceextraction_software_url', + 'active_referenceextraction_community' : 'active_referenceextraction_community', + 'active_referenceextraction_patent' : 'active_referenceextraction_patent', + 'active_referenceextraction_covid19' : 'active_referenceextraction_covid19', + + 'import_content_objectstores_csv' : 'import_content_objectstores_csv', + 'import_content_object_store_location' : 'import_content_object_store_location', + 'import_mdstore_service_location' : 'import_mdstore_service_location', + 'import_islookup_service_location' : 'import_islookup_service_location', + 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', + 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', + 'import_infospace_graph_location' : 'import_infospace_graph_location', + + 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', + 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', + 'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', + 'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative', + 'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard', + + 'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments', + 'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb', + 'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url', + 'export_action_set_id_entity_software' : 'export_action_set_id_entity_software', + 'export_action_set_id_document_community' : 'export_action_set_id_document_community', + 'export_action_set_id_document_patent' : 'export_action_set_id_document_patent', + 'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent', + 'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19', + 'export_action_set_id_document_classes' : 'export_action_set_id_document_classes' + } + + false + build-report + + + + + + + update action sets + + + + + + + + wf_20210719_221139_780 + 2021-07-21T01:23:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml new file mode 100644 index 000000000..766783f8b --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -0,0 +1,995 @@ + +
+ + + + + +
+ + Graph Construction [BETA] + Data Provision + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_provision/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_provision/graph/02_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/beta_provision/graph/03_graph_cleaned + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_provision/graph/04_graph_dedup + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/beta_provision/graph/05_graph_inferred + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_provision/graph/06_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/beta_provision/graph/07_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/beta_provision/graph/08_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/beta_provision/graph/09_graph_affiliation + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/beta_provision/graph/10_graph_comunity_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/beta_provision/graph/11_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/beta_provision/graph/12_graph_comunity_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/beta_provision/graph/13_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_provision/graph/14_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/beta_provision/graph/15_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + true + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_provision/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + true + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_provision/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'dedupGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'inferredGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup' + } + + build-report + + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/orcid', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/funding', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/beta_provision/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20210803_134357_367 + 2021-08-03T17:08:11+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml new file mode 100644 index 000000000..be6155f2f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -0,0 +1,778 @@ + +
+ + + + + +
+ + Graph construction [PROD NEW] + Data Provision + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_provision/graph/00_prod_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_provision/graph/01_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/prod_provision/graph/02_graph_cleaned + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_provision/graph/03_graph_dedup + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/prod_provision/graph/04_graph_inferred + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_provision/graph/05_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/prod_provision/graph/06_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/prod_provision/graph/07_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/prod_provision/graph/08_graph_affiliation + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/prod_provision/graph/09_graph_comunity_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/prod_provision/graph/10_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/prod_provision/graph/11_graph_comunity_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/prod_provision/graph/12_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_provision/graph/13_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/prod_provision/graph/14_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'dedupGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'inferredGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup' + } + + build-report + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/orcid', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/funding', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_semrel', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/prod_provision/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20210723_171026_279 + 2021-07-24T00:00:39+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml new file mode 100644 index 000000000..836e69d6f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml @@ -0,0 +1,74 @@ + +
+ + + + + +
+ + Graph to HiveDB [PROD] + Data Provision + 30 + + + Set the path containing the AGGREGATOR graph + + inputPath + + + + + + + + Set the target path to store the RAW graph + + hiveDbName + + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'inputPath' : 'inputPath', + 'hiveDbName' : 'hiveDbName' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hive/oozie_app', + 'sparkDriverMemory' : '4G', + 'sparkExecutorMemory' : '10G', + 'sparkExecutorCores' : '3' + } + + build-report + + + + + + + + wf_20210728_075001_400 + 2021-07-28T08:04:00+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml new file mode 100644 index 000000000..6cdf41bb6 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml @@ -0,0 +1,99 @@ + +
+ + + + + +
+ + Update Solr [PROD] + Data Provision + 30 + + + Set the path containing the GRAPH to index + + inputGraphRootPath + /tmp/prod_provision/graph/14_graph_blacklisted + + + + + + + Set the target path to store the RAW graph + + format + DMF + + + + + + + Set the lookup address + + isLookupUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'inputGraphRootPath' : 'inputGraphRootPath', + 'isLookupUrl' : 'isLookupUrl', + 'format' : 'format' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/provision/oozie_app', + 'sourceMaxRelations' : '1000', + 'targetMaxRelations' : '10000000', + 'relPartitions' : '3000', + 'batchSize' : '2000', + 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments,cites,isCitedBy', + 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource', + 'resumeFrom' : 'prepare_relations', + 'shouldIndex' : 'true', + 'outputFormat' : 'SOLR', + 'sparkDriverMemoryForJoining' : '3G', + 'sparkExecutorMemoryForJoining' : '7G', + 'sparkExecutorCoresForJoining' : '4', + 'sparkDriverMemoryForIndexing' : '2G', + 'sparkExecutorMemoryForIndexing' : '2G', + 'sparkExecutorCoresForIndexing' : '64', + 'sparkNetworkTimeout' : '600', + 'workingDir' : '/tmp/prod_provision/working_dir/update_solr' + } + + build-report + + + + + + + + wf_20210724_062705_620 + 2021-07-25T13:25:37+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml new file mode 100644 index 000000000..4dfae3c7d --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Update Stats [PROD] + Data Provision + 30 + + + Set the OpenAIRE graph DB name + + openaire_db_name + openaire_prod_yyyyMMdd + + + + + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + update the content in the stats DB + + executeOozieJob + IIS + + { + 'openaire_db_name' : 'openaire_db_name', + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_update/oozie_app', + 'hive_timeout' : '15000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_shadow_name' : 'openaire_prod_stats_shadow', + 'external_stats_db_name' : 'stats_ext', + 'monitor_db_shadow_name' : 'openaire_prod_stats_monitor_shadow', + 'observatory_db_shadow_name' : 'openaire_prod_stats_observatory_shadow', + 'context_api_url' : 'https://services.openaire.eu/openaire' + } + + build-report + + + + + + + + wf_20210725_065608_71 + 2021-07-26T07:35:55+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml new file mode 100644 index 000000000..d8def071f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml @@ -0,0 +1,87 @@ + +
+ + + + + +
+ + Publish Stats [PROD] + Content Publishing + 35 + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + publishes the stats DB to the public schema + + executeOozieJob + IIS + + { + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_promote/oozie_app', + 'hive_timeout' : '150000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_production_name' : 'openaire_prod_stats', + 'monitor_db_production_name' : 'openaire_prod_stats_monitor', + 'observatory_db_production_name' : 'openaire_prod_stats_observatory' + } + + build-report + + + + + + + + wf_20210727_160728_625 + 2021-07-27T16:53:01+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml new file mode 100644 index 000000000..cf337fd7e --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml @@ -0,0 +1,131 @@ + +
+ + + + + +
+ + Update Broker events [PROD OCEAN] + Data Provision + 30 + + + Set the path containing the GRAPH to scan + + graphInputPath + + + + + + + + Set the datasource Ids Whitelist + + datasourceIdWhitelist + openaire____::9ecafa3655143cbc4bc75853035cd432,opendoar____::dc6e224a8d74ce03bf301152d6e33e97,openaire____::09da65eaaa6deac2f785df1e0ae95a06,openaire____::3db634fc5446f389d0b826ea400a5da6,openaire____::5a38cb462ac487bf26bdb86009fe3e74,openaire____::3c29379cc184f66861e858bc7aa9615b,openaire____::4657147e48a1f32637bfe3743bce76c6,openaire____::c3267ea1c3f378c456209b6df241624e,opendoar____::358aee4cc897452c00244351e4d91f69,re3data_____::7b0ad08687b2c960d5aeef06f811d5e6,opendoar____::798ed7d4ee7138d49b8828958048130a,opendoar____::6f4922f45568161a8cdf4ad2299f6d23,opendoar____::4aa0e93b918848be0b7728b4b1568d8a,openaire____::02b55e4f52388520bfe11f959f836e68 + + + + + + + Set the datasource type Whitelist + + datasourceTypeWhitelist + pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic,datarepository::unknown,orprepository,softwarerepository + + + + + + + Set the datasource Id Blacklist + + datasourceIdBlacklist + - + + + + + + + Set the TOPIC whitelist (* = all topics) + + topicWhitelist + ENRICH/MISSING/SUBJECT/DDC,ENRICH/MISSING/SUBJECT/JEL,ENRICH/MISSING/SUBJECT/MESHEUROPMC,ENRICH/MISSING/PUBLICATION_DATE,ENRICH/MISSING/PID,ENRICH/MISSING/PROJECT,ENRICH/MISSING/SUBJECT/ACM,ENRICH/MISSING/SUBJECT/ARXIV,ENRICH/MISSING/OPENACCESS_VERSION,ENRICH/MISSING/AUTHOR/ORCID,ENRICH/MISSING/ABSTRACT,ENRICH/MORE/SUBJECT/ACM,ENRICH/MORE/SUBJECT/ARXIV,ENRICH/MORE/SUBJECT/DDC,ENRICH/MORE/SUBJECT/JEL,ENRICH/MORE/OPENACCESS_VERSION,ENRICH/MORE/SUBJECT/MESHEUROPMC,ENRICH/MORE/PID + + + + + + + Set the output path to store the Event records + + outputDir + /var/lib/dnet/broker_PROD/events + + + + + + + wait configurations + + + + + + + update the BROKER events + + executeOozieJob + IIS + + { + 'graphInputPath' : 'graphInputPath', + 'datasourceIdWhitelist' : 'datasourceIdWhitelist', + 'datasourceTypeWhitelist' : 'datasourceTypeWhitelist', + 'datasourceIdBlacklist' : 'datasourceIdBlacklist', + 'topicWhitelist' : 'topicWhitelist', + 'outputDir' : 'outputDir' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/broker/generate_events/oozie_app', + 'esEventIndexName' : '', + 'esNotificationsIndexName' : '', + 'esIndexHost' : '', + 'maxIndexedEventsForDsAndTopic' : '100', + 'esBatchWriteRetryCount' : '8', + 'esBatchWriteRetryWait' : '60s', + 'esBatchSizeEntries' : '200', + 'esNodesWanOnly' : 'true', + 'brokerApiBaseUrl' : '', + 'brokerDbUrl' : '', + 'brokerDbUser' : '', + 'brokerDbPassword' : '', + 'sparkDriverMemory' : '3G', + 'sparkExecutorMemory' : '7G', + 'sparkExecutorCores' : '6', + 'workingDir' : '/tmp/prod_provision/working_dir/broker_events' + } + + build-report + + + + + + + + wf_20210709_073839_206 + 2021-07-09T11:01:01+00:00 + FAILURE + + + +
\ No newline at end of file