diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml
new file mode 100644
index 0000000000..e4680a0cf7
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml
@@ -0,0 +1,100 @@
+
+
+
+ Import bipFinder scores
+ Import bipFinder scores
+ 30
+
+
+ declares the path holding the BIP SCORE data
+
+ bipScorePath
+ /data/bip/20201206
+
+
+
+
+
+
+ declares the path holding the LATEST GRAPH dump
+
+ latestGraphPath
+ /tmp/stable_ids/graph/14_graph_blacklisted
+
+
+
+
+
+
+ prepare action sets
+
+
+ [
+ {
+ 'set' : 'bipfinder-scores',
+ 'jobProperty' : 'export_action_set_bipfinder-scores',
+ 'enablingProperty' : 'active_bipfinder-scores',
+ 'enabled' : 'true'
+ }
+ ]
+
+
+
+
+
+
+
+ extract the hdfs output path generated in the previous node
+
+ outputPath
+
+
+
+
+
+
+ prepare AS for the bipFinder scores integration
+
+ executeOozieJob
+ IIS
+
+ {
+ 'bipScorePath':'bipScorePath',
+ 'inputPath':'latestGraphPath',
+ 'outputPath': 'outputPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/bipfinder/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/bipfinder'
+ }
+
+ build-report
+
+
+
+
+
+
+ update action sets
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml
new file mode 100644
index 0000000000..d2ea9d35f7
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml
@@ -0,0 +1,144 @@
+
+
+
+ Import Datacite ActionSet
+ Import InfoSpace
+ 30
+
+
+ set the resume from
+
+ resumeFrom
+ TransformDatacite
+
+
+
+
+
+
+ shall the datacite mapping produce the links?
+
+ exportLinks
+ false
+
+
+
+
+
+
+ set the path storing the OAF Datacite records
+
+ oafTargetPath
+ /data/datacite/production/datacite_oaf
+
+
+
+
+
+
+ set the input path for Datacite content
+
+ datacitePath
+ /data/datacite
+
+
+
+
+
+
+ prepare action sets
+
+
+ [
+ {
+ 'set' : 'datacite',
+ 'jobProperty' : 'export_action_set_datacite',
+ 'enablingProperty' : 'active_datacite',
+ 'enabled' : 'true'
+ }
+ ]
+
+
+
+
+
+
+
+ extract the hdfs output path generated in the previous node
+
+ outputPath
+
+
+
+
+
+
+ prepare a new version of Datacite ActionSet
+
+ executeOozieJob
+ IIS
+
+ {
+ 'mainPath' : 'datacitePath',
+ 'oafTargetPath' : 'oafTargetPath',
+ 'exportLinks' : 'exportLinks',
+ 'resumeFrom' : 'resumeFrom'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_import/oozie_app',
+ 'sparkExecutorMemory' : '7G'
+ }
+
+ build-report
+
+
+
+
+
+
+ prepare a new version of Datacite ActionSet
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'oafTargetPath',
+ 'outputPath' : 'outputPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_actionset/oozie_app',
+ 'sparkExecutorMemory' : '7G'
+ }
+
+ build-report
+
+
+
+
+
+
+ update action sets
+
+
+
+
+
+
+
+ wf_20210723_163342_752
+ 2021-07-23T16:44:05+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml
new file mode 100644
index 0000000000..ce9eb8f4c4
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml
@@ -0,0 +1,200 @@
+
+
+
+ Import DOIboost
+ Import InfoSpace
+ 30
+
+
+ set the input path for MAG
+
+ MAGDumpPath
+ /data/doiboost/mag-2021-02-15
+
+
+
+
+
+
+ set the input path for CROSSREF dump
+
+ crossrefDumpPath
+ /data/doiboost/crossref/
+
+
+
+
+
+
+ set the intermediate path used to process MAG
+
+ intermediatePathMAG
+ /data/doiboost/input/mag
+
+
+
+
+
+
+ set the input path for Crossref
+
+ inputPathCrossref
+ /data/doiboost/input/crossref
+
+
+
+
+
+
+ set the timestamp for the Crossref incremental harvesting
+
+ crossrefTimestamp
+ 1607614921429
+
+
+
+
+
+
+ set the input path for UnpayWall
+
+ inputPathUnpayWall
+ /data/doiboost/input/unpayWall
+
+
+
+
+
+
+ set the input path for ORCID
+
+ inputPathOrcid
+ /data/orcid_activities_2020/last_orcid_dataset
+
+
+
+
+
+
+ set the working path for ORCID
+
+ workingPathOrcid
+ /data/doiboost/input/orcid
+
+
+
+
+
+
+ set the hostedBy map path
+
+ hostedByMapPath
+ /data/doiboost/input/hostedBy/hbMap.gz
+
+
+
+
+
+
+ set the oozie workflow name from which the execution will be resumed
+
+ resumeFrom
+ ConvertCrossrefToOAF
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ prepare action sets
+
+
+ [
+ {
+ 'set' : 'doiboost',
+ 'jobProperty' : 'export_action_set_doiboost',
+ 'enablingProperty' : 'active_doiboost',
+ 'enabled' : 'true'
+ }
+ ]
+
+
+
+
+
+
+
+ extract the hdfs output path generated in the previous node
+
+ outputPath
+
+
+
+
+
+
+ prepare a new version of DOIBoost
+
+ executeOozieJob
+ IIS
+
+ {
+ 'crossrefTimestamp' : 'crossrefTimestamp',
+ 'hostedByMapPath' : 'hostedByMapPath',
+ 'MAGDumpPath' :'MAGDumpPath',
+ 'inputPathMAG' : 'intermediatePathMAG',
+ 'inputPathCrossref' : 'inputPathCrossref',
+ 'crossrefDumpPath':'crossrefDumpPath',
+ 'inputPathUnpayWall' : 'inputPathUnpayWall',
+ 'inputPathOrcid' : 'inputPathOrcid',
+ 'outputPath' : 'outputPath',
+ 'workingPathOrcid':'workingPathOrcid',
+ 'resumeFrom' : 'resumeFrom'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/doiboost_process/oozie_app',
+ 'workingPath' : '/data/doiboost/process_p',
+ 'sparkExecutorCores' : '2',
+ 'sparkExecutorIntersectionMemory' : '12G',
+ 'sparkExecutorMemory' : '8G',
+ 'esServer' : '[es_server]',
+ 'esIndex' : 'crossref'
+ }
+
+ build-report
+
+
+
+
+
+
+ update action sets
+
+
+
+
+
+
+
+ wf_20210714_075237_381
+ 2021-07-14T09:51:46+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml
new file mode 100644
index 0000000000..6d29e25a12
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml
@@ -0,0 +1,132 @@
+
+
+
+ Import H2020classification
+ Import H2020classification
+ 30
+
+
+ sets the URL to download the project file
+
+ projectFileURL
+ https://cordis.europa.eu/data/cordis-h2020projects.csv
+
+
+
+
+
+
+ sets the URL to download the programme file
+
+ programmeFileURL
+ https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv
+
+
+
+
+
+
+ sets the URL to download the topics file
+
+ topicFileURL
+ https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx
+
+
+
+
+
+
+ sets the name of the sheet in the topic file to be read
+
+ sheetName
+ Topics
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ prepare action sets
+
+
+ [
+ {
+ 'set' : 'h2020classification',
+ 'jobProperty' : 'export_action_set_h2020classification',
+ 'enablingProperty' : 'active_h2020classification',
+ 'enabled' : 'true'
+ }
+ ]
+
+
+
+
+
+
+
+ extract the hdfs output path generated in the previous node
+
+ outputPath
+
+
+
+
+
+
+ prepare updates for the H2020 Classification
+
+ executeOozieJob
+ IIS
+
+ {
+ 'outputPath': 'outputPath',
+ 'sheetName':'sheetName',
+ 'projectFileURL' : 'projectFileURL',
+ 'programmeFileURL' : 'programmeFileURL',
+ 'topicFileURL':'topicFileURL'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/project/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/h2020classification',
+ 'postgresURL':'',
+ 'postgresUser':'',
+ 'postgresPassword':''
+ }
+
+ build-report
+
+
+
+
+
+
+ update action sets
+
+
+
+
+
+
+
+ wf_20210524_084803_740
+ 2021-05-24T09:05:50+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml
new file mode 100644
index 0000000000..c5642dadcc
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml
@@ -0,0 +1,101 @@
+
+
+
+ Import Orcid
+ Import InfoSpace
+ 30
+
+
+ set the hdfs input path
+
+ inputPath
+ /data/orcid_activities_2020
+
+
+
+
+
+
+ set the temporary path where to store the action set
+
+ processOutputPath
+ /tmp/prod_provision/working_path_orcid_activities
+
+
+
+
+
+
+ prepare action sets
+
+
+ [
+ {
+ 'set' : 'orcidworks-no-doi',
+ 'jobProperty' : 'export_action_set_orcidworks_no_doi',
+ 'enablingProperty' : 'active_orcidworks_no_doi',
+ 'enabled' : 'true'
+ }
+ ]
+
+
+
+
+
+
+
+ extract the hdfs output path generated in the previous node
+
+ outputPath
+
+
+
+
+
+
+ prepare updates for the Orcid No Doi
+
+ executeOozieJob
+ IIS
+
+ {
+ 'workingPath' : 'inputPath',
+ 'processOutputPath' : 'processOutputPath',
+ 'outputPath': 'outputPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/orcidnodoi_actionset/oozie_app',
+ 'spark2GenNoDoiDatasetMaxExecutors' : '200',
+ 'spark2GenNoDoiDatasetExecutorMemory' : '2G'
+ }
+
+ build-report
+
+
+
+
+
+
+ update action sets
+
+
+
+
+
+
+
+ wf_20210713_170819_470
+ 2021-07-13T17:28:26+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml
new file mode 100644
index 0000000000..4810fda3bb
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml
@@ -0,0 +1,89 @@
+
+
+
+ Update ROR actionset
+ Import Infospace
+ 30
+
+
+ Set the base path containing the no_doi_dataset folder
+
+ inputPath
+ /data/ror/ror-data-2021-04-06.json
+
+
+
+
+
+
+ prepare action sets
+
+
+ [
+ {
+ 'set' : 'ror',
+ 'jobProperty' : 'export_action_set_ror',
+ 'enablingProperty' : 'active_ror',
+ 'enabled' : 'true'
+ }
+ ]
+
+
+
+
+
+
+
+ extract the hdfs output path generated in the previous node
+
+ outputPath
+
+
+
+
+
+
+ update the ROR actionset
+
+ executeOozieJob
+ IIS
+
+ {
+ 'rorJsonInputPath' : 'inputPath',
+ 'rorActionSetPath': 'outputPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/ror/oozie_app',
+ 'workingDir': '/tmp/import_ror_actionset_prod'
+ }
+
+ build-report
+
+
+
+
+
+
+ update action sets
+
+
+
+
+
+
+
+ wf_20210518_143542_478
+ 2021-05-18T14:37:13+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml
new file mode 100644
index 0000000000..ef2205e322
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml
@@ -0,0 +1,628 @@
+
+
+
+ Graph construction for IIS [BETA]
+ IIS
+ 30
+
+
+ set blacklist of funder nsPrefixes
+
+ nsPrefixBlacklist
+ gsrt________,rcuk________
+
+
+
+
+
+
+ set the path of the map defining the relations id mappings
+
+ idMappingPath
+ /data/maps/fct_map.json
+
+
+
+
+
+
+ Set the target path to store the MERGED graph
+
+ mergedGraphPath
+ /tmp/beta_inference/graph/01_graph_merged
+
+
+
+
+
+
+ Set the target path to store the RAW graph
+
+ rawGraphPath
+ /tmp/beta_inference/graph/02_graph_raw
+
+
+
+
+
+
+ Set the target path to store the CLEANED graph
+
+ cleanedFirstGraphPath
+ /tmp/beta_inference/graph/03_graph_clean_first
+
+
+
+
+
+
+ Set the target path to store the DEDUPED graph
+
+ dedupGraphPath
+ /tmp/beta_inference/graph/04_graph_dedup
+
+
+
+
+
+
+ Set the target path to store the CONSISTENCY graph
+
+ consistentGraphPath
+ /tmp/beta_inference/graph/05_graph_consistent
+
+
+
+
+
+
+ Set the target path to store the CLEANED graph
+
+ cleanedGraphPath
+ /tmp/beta_inference/graph/06_graph_cleaned
+
+
+
+
+
+
+ Set the dedup orchestrator name
+
+ dedupConfig
+ dedup-similarity-result-decisiontree-v2
+
+
+
+
+
+
+ declares the ActionSet ids to promote in the RAW graph
+
+ actionSetIdsRawGraph
+ scholexplorer-dump,doiboost,orcidworks-no-doi,datacite
+
+
+
+
+
+
+ Set the IS lookup service address
+
+ isLookUpUrl
+ http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+
+ reuse cached ODF claims from the PROD aggregation system
+
+ reuseODFClaims_PROD
+ true
+
+
+
+
+
+
+ reuse cached ODF records on HDFS from the PROD aggregation system
+
+ reuseODFhdfs_PROD
+ true
+
+
+
+
+
+
+ reuse cached OAF claims from the PROD aggregation system
+
+ reuseOAFClaims_PROD
+ true
+
+
+
+
+
+
+ reuse cached OAF records on HDFS from the PROD aggregation system
+
+ reuseOAFhdfs_PROD
+ true
+
+
+
+
+
+
+ reuse cached DB content from the PROD aggregation system
+
+ reuseDB_PROD
+ true
+
+
+
+
+
+
+ reuse cached OpenOrgs content from the PROD aggregation system
+
+ reuseDBOpenorgs_PROD
+ true
+
+
+
+
+
+
+ reuse cached ODF content from the PROD aggregation system
+
+ reuseODF_PROD
+ true
+
+
+
+
+
+
+ reuse cached OAF content from the PROD aggregation system
+
+ reuseOAF_PROD
+ true
+
+
+
+
+
+
+ should apply the relations id patching based on the provided idMapping on PROD?
+
+ shouldPatchRelations_PROD
+ false
+
+
+
+
+
+
+ set the PROD aggregator content path
+
+ prodContentPath
+ /tmp/prod_aggregator_for_beta
+
+
+
+
+
+
+ Set the path containing the PROD AGGREGATOR graph
+
+ prodAggregatorGraphPath
+ /tmp/beta_inference/graph/00_prod_graph_aggregator
+
+
+
+
+
+
+ reuse cached ODF claims from the BETA aggregation system
+
+ reuseODFClaims_BETA
+ true
+
+
+
+
+
+
+ reuse cached ODF records on HDFS from the BETA aggregation system
+
+ reuseODFhdfs_BETA
+ true
+
+
+
+
+
+
+ reuse cached OAF claims from the BETA aggregation system
+
+ reuseOAFClaims_BETA
+ true
+
+
+
+
+
+
+ reuse cached OAF records on HDFS from the BETA aggregation system
+
+ reuseOAFhdfs_BETA
+ true
+
+
+
+
+
+
+ reuse cached DB content from the BETA aggregation system
+
+ reuseDB_BETA
+ true
+
+
+
+
+
+
+ reuse cached OpenOrgs content from the BETA aggregation system
+
+ reuseDBOpenorgs_BETA
+ true
+
+
+
+
+
+
+ reuse cached ODF content from the BETA aggregation system
+
+ reuseODF_BETA
+ true
+
+
+
+
+
+
+ reuse cached OAF content from the BETA aggregation system
+
+ reuseOAF_BETA
+ true
+
+
+
+
+
+
+ should apply the relations id patching based on the provided idMapping on BETA?
+
+ shouldPatchRelations_BETA
+ false
+
+
+
+
+
+
+ set the BETA aggregator content path
+
+ betaContentPath
+ /tmp/beta_aggregator
+
+
+
+
+
+
+ Set the path containing the BETA AGGREGATOR graph
+
+ betaAggregatorGraphPath
+ /tmp/beta_inference/graph/00_beta_graph_aggregator
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+
+ create the BETA AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphOutputPath' : 'betaAggregatorGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl',
+ 'reuseODFClaims' : 'reuseODFClaims_BETA',
+ 'reuseOAFClaims' : 'reuseOAFClaims_BETA',
+ 'reuseDB' : 'reuseDB_BETA',
+ 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA',
+ 'reuseODF' : 'reuseODF_BETA',
+ 'reuseODF_hdfs' : 'reuseODFhdfs_BETA',
+ 'reuseOAF' : 'reuseOAF_BETA',
+ 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA',
+ 'contentPath' : 'betaContentPath',
+ 'nsPrefixBlacklist' : 'nsPrefixBlacklist',
+ 'shouldPatchRelations' : 'shouldPatchRelations_BETA',
+ 'idMappingPath' : 'idMappingPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
+ 'mongoURL' : '',
+ 'mongoDb' : '',
+ 'mdstoreManagerUrl' : '',
+ 'postgresURL' : '',
+ 'postgresUser' : '',
+ 'postgresPassword' : '',
+ 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsUser' : '',
+ 'postgresOpenOrgsPassword' : '',
+ 'shouldHashId' : 'true',
+ 'importOpenorgs' : 'true',
+ 'workingDir' : '/tmp/beta_inference/working_dir/beta_aggregator'
+ }
+
+ build-report
+
+
+
+
+
+
+ create the PROD AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphOutputPath' : 'prodAggregatorGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl',
+ 'reuseODFClaims' : 'reuseODFClaims_PROD',
+ 'reuseOAFClaims' : 'reuseOAFClaims_PROD',
+ 'reuseDB' : 'reuseDB_PROD',
+ 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD',
+ 'reuseODF' : 'reuseODF_PROD',
+ 'reuseODF_hdfs' : 'reuseODFhdfs_PROD',
+ 'reuseOAF' : 'reuseOAF_PROD',
+ 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD',
+ 'contentPath' : 'prodContentPath',
+ 'nsPrefixBlacklist' : 'nsPrefixBlacklist',
+ 'shouldPatchRelations' : 'shouldPatchRelations_PROD',
+ 'idMappingPath' : 'idMappingPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
+ 'mongoURL' : '',
+ 'mongoDb' : '',
+ 'mdstoreManagerUrl' : '',
+ 'postgresURL' : '',
+ 'postgresUser' : '',
+ 'postgresPassword' : '',
+ 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsUser' : '',
+ 'postgresOpenOrgsPassword' : '',
+ 'shouldHashId' : 'true',
+ 'importOpenorgs' : 'true',
+ 'workingDir' : '/tmp/beta_inference/working_dir/prod_aggregator'
+ }
+
+ build-report
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ create the AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'betaInputGraphPath' : 'betaAggregatorGraphPath',
+ 'prodInputGraphPath' : 'prodAggregatorGraphPath',
+ 'graphOutputPath' : 'mergedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app',
+ 'workingDir' : '/tmp/beta_inference/working_dir/merge_graph',
+ 'priority' : 'BETA'
+ }
+
+ build-report
+
+
+
+
+
+
+ create the RAW graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputActionSetIds' : 'actionSetIdsRawGraph',
+ 'inputGraphRootPath' : 'mergedGraphPath',
+ 'outputGraphRootPath' : 'rawGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'activePromoteDatasetActionPayload' : 'true',
+ 'activePromoteDatasourceActionPayload' : 'true',
+ 'activePromoteOrganizationActionPayload' : 'true',
+ 'activePromoteOtherResearchProductActionPayload' : 'true',
+ 'activePromoteProjectActionPayload' : 'true',
+ 'activePromotePublicationActionPayload' : 'true',
+ 'activePromoteRelationActionPayload' : 'true',
+ 'activePromoteResultActionPayload' : 'true',
+ 'activePromoteSoftwareActionPayload' : 'true',
+ 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
+ 'workingDir' : '/tmp/beta_inference/working_dir/promoteActionsRaw'
+ }
+
+ build-report
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'rawGraphPath',
+ 'graphOutputPath': 'cleanedFirstGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
+ 'workingDir' : '/tmp/beta_inference/working_dir/clean_first'
+ }
+
+ build-report
+
+
+
+
+
+
+ search for duplicates in the raw graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'actionSetId' : 'dedupConfig',
+ 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'dedupGraphPath': 'dedupGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app',
+ 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
+ 'workingPath' : '/tmp/beta_inference/working_dir/dedup',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G'
+ }
+
+ build-report
+
+
+
+
+
+
+ mark duplicates as deleted and redistribute the relationships
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath' : 'dedupGraphPath',
+ 'graphOutputPath': 'consistentGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app',
+ 'workingPath' : '/tmp/beta_inference/working_dir/dedup'
+ }
+
+ build-report
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'consistentGraphPath',
+ 'graphOutputPath': 'cleanedGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
+ 'workingDir' : '/tmp/beta_inference/working_dir/clean'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210730_094240_462
+ 2021-07-30T15:04:19+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml
new file mode 100644
index 0000000000..e5ce3d7109
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml
@@ -0,0 +1,437 @@
+
+
+
+ Graph construction for IIS [PROD NEW]
+ IIS
+ 30
+
+
+ set blacklist of funder nsPrefixes
+
+ nsPrefixBlacklist
+ conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________
+
+
+
+
+
+
+ set the path of the map defining the relations id mappings
+
+ idMappingPath
+ /data/maps/fct_map.json
+
+
+
+
+
+
+ Set the path containing the PROD AGGREGATOR graph
+
+ aggregatorGraphPath
+ /tmp/prod_inference/graph/00_graph_aggregator
+
+
+
+
+
+
+ Set the target path to store the RAW graph
+
+ rawGraphPath
+ /tmp/prod_inference/graph/01_graph_raw
+
+
+
+
+
+
+ Set the target path to store the CLEANED graph
+
+ cleanedFirstGraphPath
+ /tmp/prod_inference/graph/02_graph_clean_first
+
+
+
+
+
+
+ Set the target path to store the DEDUPED graph
+
+ dedupGraphPath
+ /tmp/prod_inference/graph/03_graph_dedup
+
+
+
+
+
+
+ Set the target path to store the CONSISTENCY graph
+
+ consistentGraphPath
+ /tmp/prod_inference/graph/04_graph_consistent
+
+
+
+
+
+
+ Set the target path to store the CLEANED graph
+
+ cleanedGraphPath
+ /tmp/prod_inference/graph/05_graph_cleaned
+
+
+
+
+
+
+ Set the dedup orchestrator name
+
+ dedupConfig
+ dedup-similarity-result-decisiontree-v2
+
+
+
+
+
+
+ declares the ActionSet ids to promote in the RAW graph
+
+ actionSetIdsRawGraph
+ scholexplorer-dump,doiboost,orcidworks-no-doi,datacite
+
+
+
+
+
+
+ Set the IS lookup service address
+
+ isLookUpUrl
+ http://services.openaire.eu:8280/is/services/isLookUp?wsdl
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ reuse cached ODF claims from the PROD aggregation system
+
+ reuseODFClaims
+ true
+
+
+
+
+
+
+ reuse cached OAF claims from the PROD aggregation system
+
+ reuseOAFClaims
+ true
+
+
+
+
+
+
+ reuse cached ODF records on HDFS from the PROD aggregation system
+
+ reuseODFhdfs
+ true
+
+
+
+
+
+
+ reuse cached OAF records on HDFS from the PROD aggregation system
+
+ reuseOAFhdfs
+ true
+
+
+
+
+
+
+ reuse cached ODF content from the PROD aggregation system
+
+ reuseODF
+ true
+
+
+
+
+
+
+ reuse cached OAF content from the PROD aggregation system
+
+ reuseOAF
+ true
+
+
+
+
+
+
+ reuse cached DB content from the PROD aggregation system
+
+ reuseDB
+ true
+
+
+
+
+
+
+ reuse cached OpenOrgs content from the PROD aggregation system
+
+ reuseDBOpenorgs
+ true
+
+
+
+
+
+
+ should apply the relations id patching based on the provided idMapping?
+
+ shouldPatchRelations
+ false
+
+
+
+
+
+
+ set the PROD aggregator content path
+
+ contentPath
+ /tmp/prod_aggregator
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ create the PROD AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphOutputPath' : 'aggregatorGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl',
+ 'reuseODFClaims' : 'reuseODFClaims',
+ 'reuseOAFClaims' : 'reuseOAFClaims',
+ 'reuseDB' : 'reuseDB',
+ 'reuseDBOpenorgs' : 'reuseDBOpenorgs',
+ 'reuseODF' : 'reuseODF',
+ 'reuseODF_hdfs' : 'reuseODFhdfs',
+ 'reuseOAF' : 'reuseOAF',
+ 'reuseOAF_hdfs' : 'reuseOAFhdfs',
+ 'contentPath' : 'contentPath',
+ 'nsPrefixBlacklist' : 'nsPrefixBlacklist',
+ 'shouldPatchRelations' : 'shouldPatchRelations',
+ 'idMappingPath' : 'idMappingPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app',
+ 'mongoURL' : '',
+ 'mongoDb' : '',
+ 'mdstoreManagerUrl' : '',
+ 'postgresURL' : '',
+ 'postgresUser' : '',
+ 'postgresPassword' : '',
+ 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsUser' : '',
+ 'postgresOpenOrgsPassword' : '',
+ 'shouldHashId' : 'true',
+ 'importOpenorgs' : 'true',
+ 'workingDir' : '/tmp/prod_inference/working_dir/prod_aggregator'
+ }
+
+ build-report
+
+
+
+
+
+
+ create the RAW graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputActionSetIds' : 'actionSetIdsRawGraph',
+ 'inputGraphRootPath' : 'aggregatorGraphPath',
+ 'outputGraphRootPath' : 'rawGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'activePromoteDatasetActionPayload' : 'true',
+ 'activePromoteDatasourceActionPayload' : 'true',
+ 'activePromoteOrganizationActionPayload' : 'true',
+ 'activePromoteOtherResearchProductActionPayload' : 'true',
+ 'activePromoteProjectActionPayload' : 'true',
+ 'activePromotePublicationActionPayload' : 'true',
+ 'activePromoteRelationActionPayload' : 'true',
+ 'activePromoteResultActionPayload' : 'true',
+ 'activePromoteSoftwareActionPayload' : 'true',
+ 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
+ 'workingDir' : '/tmp/prod_inference/working_dir/promoteActionsRaw'
+ }
+
+ build-report
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'rawGraphPath',
+ 'graphOutputPath': 'cleanedFirstGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
+ 'workingDir' : '/tmp/prod_inference/working_dir/clean_first'
+ }
+
+ build-report
+
+
+
+
+
+
+ search for duplicates in the raw graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'actionSetId' : 'dedupConfig',
+ 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'dedupGraphPath': 'dedupGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app',
+ 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
+ 'workingPath' : '/tmp/prod_inference/working_dir/dedup',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G'
+ }
+
+ build-report
+
+
+
+
+
+
+ mark duplicates as deleted and redistribute the relationships
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath' : 'dedupGraphPath',
+ 'graphOutputPath': 'consistentGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app',
+ 'workingPath' : '/tmp/prod_inference/working_dir/dedup'
+ }
+
+ build-report
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'consistentGraphPath',
+ 'graphOutputPath': 'cleanedGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
+ 'workingDir' : '/tmp/prod_inference/working_dir/clean'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210719_165159_86
+ 2021-07-19T20:45:09+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml
new file mode 100644
index 0000000000..126d5f58d8
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml
@@ -0,0 +1,225 @@
+
+
+
+ IIS main workflow V3 [PROD]
+ IIS
+ 30
+
+
+ start
+
+
+
+
+
+
+ Set a regex of funder shortnames to exclude from the project reference processing
+
+ referenceextraction_project_fundingclass_blacklist_regex
+ ^DFG::.*$|^CONICYT::.*$|^RSF::.*$|^SGOV::.*$|^GSRT::.*$|^MIUR::.*$|^INNOVIRIS::.*$|^RIF::.*$|^SFRS::.*$
+
+
+
+
+
+
+ prepare action sets
+
+
+ [
+ {
+ 'set' : 'iis-document-affiliation',
+ 'jobProperty' : 'export_action_set_id_matched_doc_organizations',
+ 'enablingProperty' : 'active_document_affiliation',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-referenced-projects-main',
+ 'jobProperty' : 'export_action_set_id_document_referencedProjects',
+ 'enablingProperty' : 'active_referenceextraction_project',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-referenced-datasets-main',
+ 'jobProperty' : 'export_action_set_id_document_referencedDatasets',
+ 'enablingProperty' : 'active_referenceextraction_dataset',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-researchinitiative',
+ 'jobProperty' : 'export_action_set_id_document_research_initiative',
+ 'enablingProperty' : 'active_referenceextraction_researchinitiative',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-document-similarities',
+ 'jobProperty' : 'export_action_set_id_document_similarities_standard',
+ 'enablingProperty' : 'active_documentssimilarity',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-document-classes',
+ 'jobProperty' : 'export_action_set_id_document_classes',
+ 'enablingProperty' : 'active_documentsclassification',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-document-citations',
+ 'jobProperty' : 'export_action_set_id_document_referencedDocuments',
+ 'enablingProperty' : 'active_citationmatching',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-document-citations-relations',
+ 'jobProperty' : 'export_action_set_id_citation_relations',
+ 'enablingProperty' : 'active_citationmatching_relations',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-referenceextraction-pdb',
+ 'jobProperty' : 'export_action_set_id_document_pdb',
+ 'enablingProperty' : 'active_referenceextraction_pdb',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'document_software_url',
+ 'jobProperty' : 'export_action_set_id_document_software_url',
+ 'enablingProperty' : 'active_referenceextraction_software_url',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-entities-software',
+ 'jobProperty' : 'export_action_set_id_entity_software',
+ 'enablingProperty' : 'active_referenceextraction_software_url',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-communities',
+ 'jobProperty' : 'export_action_set_id_document_community',
+ 'enablingProperty' : 'active_referenceextraction_community',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-referenced-patents',
+ 'jobProperty' : 'export_action_set_id_document_patent',
+ 'enablingProperty' : 'active_referenceextraction_patent',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-entities-patent',
+ 'jobProperty' : 'export_action_set_id_entity_patent',
+ 'enablingProperty' : 'active_referenceextraction_patent',
+ 'enabled' : 'true'
+ },
+ {
+ 'set' : 'iis-covid-19',
+ 'jobProperty' : 'export_action_set_id_document_covid19',
+ 'enablingProperty' : 'active_referenceextraction_covid19',
+ 'enabled' : 'true'
+ }
+ ]
+
+
+
+
+
+
+
+ prepare parameters
+
+ import_islookup_service_location
+ import_content_objectstores_csv
+ import_content_object_store_location
+ import_mdstore_service_location
+ import_dataset_mdstore_ids_csv
+ oozie.wf.application.path
+ /lib/iis/primary/snapshots/2021-06-23
+ IIS
+ /tmp/prod_inference/graph/05_graph_cleaned
+ import_infospace_graph_location
+
+ import_project_concepts_context_ids_csv
+ aginfra,beopen,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,enermaps,epos,fam,fet-fp7,fet-h2020,gotriple,instruct,mes,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata
+
+
+
+
+
+
+ IIS main
+
+ iisMainJobV3
+
+ {
+ 'cluster' : 'cluster',
+ 'oozie.wf.application.path' : 'oozie.wf.application.path',
+ 'referenceextraction_project_fundingclass_blacklist_regex' : 'referenceextraction_project_fundingclass_blacklist_regex',
+
+ 'active_document_affiliation' : 'active_document_affiliation',
+ 'active_referenceextraction_project' : 'active_referenceextraction_project',
+ 'active_referenceextraction_dataset' : 'active_referenceextraction_dataset',
+ 'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative',
+ 'active_documentsclassification' : 'active_documentsclassification',
+ 'active_documentssimilarity' : 'active_documentssimilarity',
+ 'active_citationmatching' : 'active_citationmatching',
+ 'active_citationmatching_relations' : 'active_citationmatching_relations',
+ 'active_referenceextraction_pdb' : 'active_referenceextraction_pdb',
+ 'active_referenceextraction_software_url' : 'active_referenceextraction_software_url',
+ 'active_referenceextraction_community' : 'active_referenceextraction_community',
+ 'active_referenceextraction_patent' : 'active_referenceextraction_patent',
+ 'active_referenceextraction_covid19' : 'active_referenceextraction_covid19',
+
+ 'import_content_objectstores_csv' : 'import_content_objectstores_csv',
+ 'import_content_object_store_location' : 'import_content_object_store_location',
+ 'import_mdstore_service_location' : 'import_mdstore_service_location',
+ 'import_islookup_service_location' : 'import_islookup_service_location',
+ 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv',
+ 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv',
+ 'import_infospace_graph_location' : 'import_infospace_graph_location',
+
+ 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations',
+ 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets',
+ 'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects',
+ 'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative',
+ 'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard',
+
+ 'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments',
+ 'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb',
+ 'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url',
+ 'export_action_set_id_entity_software' : 'export_action_set_id_entity_software',
+ 'export_action_set_id_document_community' : 'export_action_set_id_document_community',
+ 'export_action_set_id_document_patent' : 'export_action_set_id_document_patent',
+ 'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent',
+ 'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19',
+ 'export_action_set_id_document_classes' : 'export_action_set_id_document_classes'
+ }
+
+ false
+ build-report
+
+
+
+
+
+
+ update action sets
+
+
+
+
+
+
+
+ wf_20210719_221139_780
+ 2021-07-21T01:23:13+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml
new file mode 100644
index 0000000000..766783f8bb
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml
@@ -0,0 +1,995 @@
+
+
+
+ Graph Construction [BETA]
+ Data Provision
+ 30
+
+
+ set blacklist of funder nsPrefixes
+
+ nsPrefixBlacklist
+ gsrt________,rcuk________
+
+
+
+
+
+
+ set the path of the map defining the relations id mappings
+
+ idMappingPath
+ /data/maps/fct_map.json
+
+
+
+
+
+
+ Set the target path to store the MERGED graph
+
+ mergedGraphPath
+ /tmp/beta_provision/graph/01_graph_merged
+
+
+
+
+
+
+ Set the target path to store the RAW graph
+
+ rawGraphPath
+ /tmp/beta_provision/graph/02_graph_raw
+
+
+
+
+
+
+ Set the target path to store the the consistent graph cleaned
+
+ cleanedFirstGraphPath
+ /tmp/beta_provision/graph/03_graph_cleaned
+
+
+
+
+
+
+ Set the target path to store the DEDUPED graph
+
+ dedupGraphPath
+ /tmp/beta_provision/graph/04_graph_dedup
+
+
+
+
+
+
+ Set the target path to store the INFERRED graph
+
+ inferredGraphPath
+ /tmp/beta_provision/graph/05_graph_inferred
+
+
+
+
+
+
+ Set the target path to store the CONSISTENCY graph
+
+ consistentGraphPath
+ /tmp/beta_provision/graph/06_graph_consistent
+
+
+
+
+
+
+ Set the target path to store the ORCID enriched graph
+
+ orcidGraphPath
+ /tmp/beta_provision/graph/07_graph_orcid
+
+
+
+
+
+
+ Set the target path to store the BULK TAGGED graph
+
+ bulkTaggingGraphPath
+ /tmp/beta_provision/graph/08_graph_bulktagging
+
+
+
+
+
+
+ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
+
+ affiliationGraphPath
+ /tmp/beta_provision/graph/09_graph_affiliation
+
+
+
+
+
+
+ Set the target path to store the COMMUNITY from SELECTED SOURCES graph
+
+ communityOrganizationGraphPath
+ /tmp/beta_provision/graph/10_graph_comunity_organization
+
+
+
+
+
+
+ Set the target path to store the FUNDING from SEMANTIC RELATION graph
+
+ fundingGraphPath
+ /tmp/beta_provision/graph/11_graph_funding
+
+
+
+
+
+
+ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
+
+ communitySemRelGraphPath
+ /tmp/beta_provision/graph/12_graph_comunity_sem_rel
+
+
+
+
+
+
+ Set the target path to store the COUNTRY enriched graph
+
+ countryGraphPath
+ /tmp/beta_provision/graph/13_graph_country
+
+
+
+
+
+
+ Set the target path to store the CLEANED graph
+
+ cleanedGraphPath
+ /tmp/beta_provision/graph/14_graph_cleaned
+
+
+
+
+
+
+ Set the target path to store the blacklisted graph
+
+ blacklistedGraphPath
+ /tmp/beta_provision/graph/15_graph_blacklisted
+
+
+
+
+
+
+ Set the map of paths for the Bulk Tagging
+
+ bulkTaggingPathMap
+ {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"}
+
+
+
+
+
+
+ Set the map of associations organization, community list for the propagation of community to result through organization
+
+ propagationOrganizationCommunityMap
+ {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"],
+ "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]}
+
+
+
+
+
+
+
+ Set the dedup orchestrator name
+
+ dedupConfig
+ dedup-similarity-result-decisiontree-v2
+
+
+
+
+
+
+ declares the ActionSet ids to promote in the RAW graph
+
+ actionSetIdsRawGraph
+ scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite
+
+
+
+
+
+
+ declares the ActionSet ids to promote in the INFERRED graph
+
+ actionSetIdsIISGraph
+ iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores
+
+
+
+
+
+
+ Set the IS lookup service address
+
+ isLookUpUrl
+ http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+
+ reuse cached ODF claims from the PROD aggregation system
+
+ reuseODFClaims_PROD
+ true
+
+
+
+
+
+
+ reuse cached ODF records on HDFS from the PROD aggregation system
+
+ reuseODFhdfs_PROD
+ true
+
+
+
+
+
+
+ reuse cached OAF claims from the PROD aggregation system
+
+ reuseOAFClaims_PROD
+ true
+
+
+
+
+
+
+ reuse cached OAF records on HDFS from the PROD aggregation system
+
+ reuseOAFhdfs_PROD
+ true
+
+
+
+
+
+
+ reuse cached DB content from the PROD aggregation system
+
+ reuseDB_PROD
+ true
+
+
+
+
+
+
+ reuse cached OpenOrgs content from the PROD aggregation system
+
+ reuseDBOpenorgs_PROD
+ true
+
+
+
+
+
+
+ reuse cached ODF content from the PROD aggregation system
+
+ reuseODF_PROD
+ true
+
+
+
+
+
+
+ reuse cached OAF content from the PROD aggregation system
+
+ reuseOAF_PROD
+ true
+
+
+
+
+
+
+ should apply the relations id patching based on the provided idMapping on PROD?
+
+ shouldPatchRelations_PROD
+ true
+
+
+
+
+
+
+ set the PROD aggregator content path
+
+ prodContentPath
+ /tmp/prod_aggregator_for_beta
+
+
+
+
+
+
+ Set the path containing the PROD AGGREGATOR graph
+
+ prodAggregatorGraphPath
+ /tmp/beta_provision/graph/00_prod_graph_aggregator
+
+
+
+
+
+
+ reuse cached ODF claims from the BETA aggregation system
+
+ reuseODFClaims_BETA
+ true
+
+
+
+
+
+
+ reuse cached ODF records on HDFS from the BETA aggregation system
+
+ reuseODFhdfs_BETA
+ true
+
+
+
+
+
+
+ reuse cached OAF claims from the BETA aggregation system
+
+ reuseOAFClaims_BETA
+ true
+
+
+
+
+
+
+ reuse cached OAF records on HDFS from the BETA aggregation system
+
+ reuseOAFhdfs_BETA
+ true
+
+
+
+
+
+
+ reuse cached DB content from the BETA aggregation system
+
+ reuseDB_BETA
+ true
+
+
+
+
+
+
+ reuse cached OpenOrgs content from the BETA aggregation system
+
+ reuseDBOpenorgs_BETA
+ true
+
+
+
+
+
+
+ reuse cached ODF content from the BETA aggregation system
+
+ reuseODF_BETA
+ true
+
+
+
+
+
+
+ reuse cached OAF content from the BETA aggregation system
+
+ reuseOAF_BETA
+ true
+
+
+
+
+
+
+ should apply the relations id patching based on the provided idMapping on BETA?
+
+ shouldPatchRelations_BETA
+ true
+
+
+
+
+
+
+ set the BETA aggregator content path
+
+ betaContentPath
+ /tmp/beta_aggregator
+
+
+
+
+
+
+ Set the path containing the BETA AGGREGATOR graph
+
+ betaAggregatorGraphPath
+ /tmp/beta_provision/graph/00_beta_graph_aggregator
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+
+ create the BETA AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphOutputPath' : 'betaAggregatorGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl',
+ 'reuseODFClaims' : 'reuseODFClaims_BETA',
+ 'reuseOAFClaims' : 'reuseOAFClaims_BETA',
+ 'reuseDB' : 'reuseDB_BETA',
+ 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA',
+ 'reuseODF' : 'reuseODF_BETA',
+ 'reuseODF_hdfs' : 'reuseODFhdfs_BETA',
+ 'reuseOAF' : 'reuseOAF_BETA',
+ 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA',
+ 'contentPath' : 'betaContentPath',
+ 'nsPrefixBlacklist' : 'nsPrefixBlacklist',
+ 'shouldPatchRelations' : 'shouldPatchRelations_BETA',
+ 'idMappingPath' : 'idMappingPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
+ 'mongoURL' : '',
+ 'mongoDb' : '',
+ 'mdstoreManagerUrl' : '',
+ 'postgresURL' : '',
+ 'postgresUser' : '',
+ 'postgresPassword' : '',
+ 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsUser' : '',
+ 'postgresOpenOrgsPassword' : '',
+ 'shouldHashId' : 'true',
+ 'importOpenorgs' : 'true',
+ 'workingDir' : '/tmp/beta_provision/working_dir/beta_aggregator'
+ }
+
+ build-report
+
+
+
+
+
+
+ create the PROD AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphOutputPath' : 'prodAggregatorGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl',
+ 'reuseODFClaims' : 'reuseODFClaims_PROD',
+ 'reuseOAFClaims' : 'reuseOAFClaims_PROD',
+ 'reuseDB' : 'reuseDB_PROD',
+ 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD',
+ 'reuseODF' : 'reuseODF_PROD',
+ 'reuseODF_hdfs' : 'reuseODFhdfs_PROD',
+ 'reuseOAF' : 'reuseOAF_PROD',
+ 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD',
+ 'contentPath' : 'prodContentPath',
+ 'nsPrefixBlacklist' : 'nsPrefixBlacklist',
+ 'shouldPatchRelations' : 'shouldPatchRelations_PROD',
+ 'idMappingPath' : 'idMappingPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app',
+ 'mongoURL' : '',
+ 'mongoDb' : '',
+ 'mdstoreManagerUrl' : '',
+ 'postgresURL' : '',
+ 'postgresUser' : '',
+ 'postgresPassword' : '',
+ 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsUser' : '',
+ 'postgresOpenOrgsPassword' : '',
+ 'shouldHashId' : 'true',
+ 'importOpenorgs' : 'true',
+ 'workingDir' : '/tmp/beta_provision/working_dir/prod_aggregator'
+ }
+
+ build-report
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ create the AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'betaInputGraphPath' : 'betaAggregatorGraphPath',
+ 'prodInputGraphPath' : 'prodAggregatorGraphPath',
+ 'graphOutputPath' : 'mergedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/merge_graph',
+ 'priority' : 'BETA'
+ }
+
+ build-report
+
+
+
+
+
+
+ create the RAW graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputActionSetIds' : 'actionSetIdsRawGraph',
+ 'inputGraphRootPath' : 'mergedGraphPath',
+ 'outputGraphRootPath' : 'rawGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'activePromoteDatasetActionPayload' : 'true',
+ 'activePromoteDatasourceActionPayload' : 'true',
+ 'activePromoteOrganizationActionPayload' : 'true',
+ 'activePromoteOtherResearchProductActionPayload' : 'true',
+ 'activePromoteProjectActionPayload' : 'true',
+ 'activePromotePublicationActionPayload' : 'true',
+ 'activePromoteRelationActionPayload' : 'true',
+ 'activePromoteResultActionPayload' : 'true',
+ 'activePromoteSoftwareActionPayload' : 'true',
+ 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
+ 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw'
+ }
+
+ build-report
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'rawGraphPath',
+ 'graphOutputPath': 'cleanedFirstGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/clean'
+ }
+
+ build-report
+
+
+
+
+
+
+ search for duplicates in the raw graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'actionSetId' : 'dedupConfig',
+ 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'dedupGraphPath': 'dedupGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app',
+ 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
+ 'workingPath' : '/tmp/beta_provision/working_dir/dedup',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G'
+ }
+
+ build-report
+
+
+
+
+
+
+ create the INFERRED graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputActionSetIds' : 'actionSetIdsIISGraph',
+ 'inputGraphRootPath' : 'dedupGraphPath',
+ 'outputGraphRootPath' : 'inferredGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'activePromoteDatasetActionPayload' : 'true',
+ 'activePromoteDatasourceActionPayload' : 'true',
+ 'activePromoteOrganizationActionPayload' : 'true',
+ 'activePromoteOtherResearchProductActionPayload' : 'true',
+ 'activePromoteProjectActionPayload' : 'true',
+ 'activePromotePublicationActionPayload' : 'true',
+ 'activePromoteRelationActionPayload' : 'true',
+ 'activePromoteResultActionPayload' : 'true',
+ 'activePromoteSoftwareActionPayload' : 'true',
+ 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
+ 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS'
+ }
+
+ build-report
+
+
+
+
+
+
+ mark duplicates as deleted and redistribute the relationships
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath' : 'inferredGraphPath',
+ 'graphOutputPath': 'consistentGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app',
+ 'workingPath' : '/tmp/beta_provision/working_dir/dedup'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ propagates ORCID among results linked by allowedsemrels semantic relationships
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'consistentGraphPath',
+ 'outputPath': 'orcidGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/orcid',
+ 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo',
+ 'saveGraph' : 'true',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G'
+ }
+
+ build-report
+
+
+
+
+
+
+ mark results respecting some rules as belonging to communities
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'orcidGraphPath',
+ 'outputPath': 'bulkTaggingGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl',
+ 'pathMap' : 'bulkTaggingPathMap'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/bulktag'
+ }
+
+ build-report
+
+
+
+
+
+
+ creates relashionships between results and organizations when the organizations are associated to institutional repositories
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'bulkTaggingGraphPath',
+ 'outputPath': 'affiliationGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/affiliation',
+ 'saveGraph' : 'true',
+ 'blacklist' : 'empty'
+ }
+
+ build-report
+
+
+
+
+
+
+ marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'affiliationGraphPath',
+ 'outputPath': 'communityOrganizationGraphPath',
+ 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/community_organization',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'communityOrganizationGraphPath',
+ 'outputPath': 'fundingGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/funding',
+ 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'fundingGraphPath',
+ 'outputPath': 'communitySemRelGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel',
+ 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'communitySemRelGraphPath',
+ 'outputPath': 'countryGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'workingDir' : '/tmp/beta_provision/working_dir/country',
+ 'allowedtypes' : 'pubsrepository::institutional',
+ 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'countryGraphPath',
+ 'graphOutputPath': 'cleanedGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/clean'
+ }
+
+ build-report
+
+
+
+
+
+
+ removes blacklisted relations
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'cleanedGraphPath',
+ 'outputPath': 'blacklistedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/blacklist',
+ 'postgresURL' : '',
+ 'postgresUser' : '',
+ 'postgresPassword' : ''
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210803_134357_367
+ 2021-08-03T17:08:11+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml
new file mode 100644
index 0000000000..be6155f2fb
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml
@@ -0,0 +1,778 @@
+
+
+
+ Graph construction [PROD NEW]
+ Data Provision
+ 30
+
+
+ set blacklist of funder nsPrefixes
+
+ nsPrefixBlacklist
+ conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________
+
+
+
+
+
+
+ Set the path containing the PROD AGGREGATOR graph
+
+ aggregatorGraphPath
+ /tmp/prod_provision/graph/00_prod_graph_aggregator
+
+
+
+
+
+
+ Set the target path to store the RAW graph
+
+ rawGraphPath
+ /tmp/prod_provision/graph/01_graph_raw
+
+
+
+
+
+
+ Set the target path to store the the consistent graph cleaned
+
+ cleanedFirstGraphPath
+ /tmp/prod_provision/graph/02_graph_cleaned
+
+
+
+
+
+
+ Set the target path to store the DEDUPED graph
+
+ dedupGraphPath
+ /tmp/prod_provision/graph/03_graph_dedup
+
+
+
+
+
+
+ Set the target path to store the INFERRED graph
+
+ inferredGraphPath
+ /tmp/prod_provision/graph/04_graph_inferred
+
+
+
+
+
+
+ Set the target path to store the CONSISTENCY graph
+
+ consistentGraphPath
+ /tmp/prod_provision/graph/05_graph_consistent
+
+
+
+
+
+
+ Set the target path to store the ORCID enriched graph
+
+ orcidGraphPath
+ /tmp/prod_provision/graph/06_graph_orcid
+
+
+
+
+
+
+ Set the target path to store the BULK TAGGED graph
+
+ bulkTaggingGraphPath
+ /tmp/prod_provision/graph/07_graph_bulktagging
+
+
+
+
+
+
+ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
+
+ affiliationGraphPath
+ /tmp/prod_provision/graph/08_graph_affiliation
+
+
+
+
+
+
+ Set the target path to store the COMMUNITY from SELECTED SOURCES graph
+
+ communityOrganizationGraphPath
+ /tmp/prod_provision/graph/09_graph_comunity_organization
+
+
+
+
+
+
+ Set the target path to store the FUNDING from SEMANTIC RELATION graph
+
+ fundingGraphPath
+ /tmp/prod_provision/graph/10_graph_funding
+
+
+
+
+
+
+ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
+
+ communitySemRelGraphPath
+ /tmp/prod_provision/graph/11_graph_comunity_sem_rel
+
+
+
+
+
+
+ Set the target path to store the COUNTRY enriched graph
+
+ countryGraphPath
+ /tmp/prod_provision/graph/12_graph_country
+
+
+
+
+
+
+ Set the target path to store the CLEANED graph
+
+ cleanedGraphPath
+ /tmp/prod_provision/graph/13_graph_cleaned
+
+
+
+
+
+
+ Set the target path to store the blacklisted graph
+
+ blacklistedGraphPath
+ /tmp/prod_provision/graph/14_graph_blacklisted
+
+
+
+
+
+
+ Set the map of paths for the Bulk Tagging
+
+ bulkTaggingPathMap
+ {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"}
+
+
+
+
+
+
+ Set the map of associations organization, community list for the propagation of community to result through organization
+
+ propagationOrganizationCommunityMap
+ {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"],
+ "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]}
+
+
+
+
+
+
+
+ Set the dedup orchestrator name
+
+ dedupConfig
+ dedup-similarity-result-decisiontree-v2
+
+
+
+
+
+
+ declares the ActionSet ids to promote in the RAW graph
+
+ actionSetIdsRawGraph
+ scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite
+
+
+
+
+
+
+ declares the ActionSet ids to promote in the INFERRED graph
+
+ actionSetIdsIISGraph
+ iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores
+
+
+
+
+
+
+ Set the IS lookup service address
+
+ isLookUpUrl
+ http://services.openaire.eu:8280/is/services/isLookUp?wsdl
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ reuse cached ODF claims from the PROD aggregation system
+
+ reuseODFClaims
+ true
+
+
+
+
+
+
+ reuse cached ODF records on HDFS from the PROD aggregation system
+
+ reuseODFhdfs
+ true
+
+
+
+
+
+
+ reuse cached OAF claims from the PROD aggregation system
+
+ reuseOAFClaims
+ true
+
+
+
+
+
+
+ reuse cached OAF records on HDFS from the PROD aggregation system
+
+ reuseOAFhdfs
+ true
+
+
+
+
+
+
+ reuse cached DB content from the PROD aggregation system
+
+ reuseDB
+ true
+
+
+
+
+
+
+ reuse cached OpenOrgs content from the PROD aggregation system
+
+ reuseDBOpenorgs
+ true
+
+
+
+
+
+
+ reuse cached ODF content from the PROD aggregation system
+
+ reuseODF
+ true
+
+
+
+
+
+
+ reuse cached OAF content from the PROD aggregation system
+
+ reuseOAF
+ true
+
+
+
+
+
+
+ set the PROD aggregator content path
+
+ contentPath
+ /tmp/prod_aggregator
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ create the PROD AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphOutputPath' : 'aggregatorGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl',
+ 'reuseODFClaims' : 'reuseODFClaims',
+ 'reuseOAFClaims' : 'reuseOAFClaims',
+ 'reuseDB' : 'reuseDB',
+ 'reuseDBOpenorgs' : 'reuseDBOpenorgs',
+ 'reuseODF' : 'reuseODF',
+ 'reuseODF_hdfs' : 'reuseODFhdfs',
+ 'reuseOAF' : 'reuseOAF',
+ 'reuseOAF_hdfs' : 'reuseOAFhdfs',
+ 'contentPath' : 'contentPath',
+ 'nsPrefixBlacklist' : 'nsPrefixBlacklist'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app',
+ 'mongoURL' : '',
+ 'mongoDb' : '',
+ 'mdstoreManagerUrl' : '',
+ 'postgresURL' : '',
+ 'postgresUser' : '',
+ 'postgresPassword' : '',
+ 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsUser' : '',
+ 'postgresOpenOrgsPassword' : '',
+ 'shouldHashId' : 'true',
+ 'importOpenorgs' : 'true',
+ 'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator'
+ }
+
+ build-report
+
+
+
+
+
+
+ create the RAW graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputActionSetIds' : 'actionSetIdsRawGraph',
+ 'inputGraphRootPath' : 'aggregatorGraphPath',
+ 'outputGraphRootPath' : 'rawGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'activePromoteDatasetActionPayload' : 'true',
+ 'activePromoteDatasourceActionPayload' : 'true',
+ 'activePromoteOrganizationActionPayload' : 'true',
+ 'activePromoteOtherResearchProductActionPayload' : 'true',
+ 'activePromoteProjectActionPayload' : 'true',
+ 'activePromotePublicationActionPayload' : 'true',
+ 'activePromoteRelationActionPayload' : 'true',
+ 'activePromoteResultActionPayload' : 'true',
+ 'activePromoteSoftwareActionPayload' : 'true',
+ 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
+ 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw'
+ }
+
+ build-report
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'rawGraphPath',
+ 'graphOutputPath': 'cleanedFirstGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/clean'
+ }
+
+ build-report
+
+
+
+
+
+
+ search for duplicates in the raw graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'actionSetId' : 'dedupConfig',
+ 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'dedupGraphPath': 'dedupGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app',
+ 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple',
+ 'workingPath' : '/tmp/prod_provision/working_dir/dedup',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G'
+ }
+
+ build-report
+
+
+
+
+
+
+ create the INFERRED graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputActionSetIds' : 'actionSetIdsIISGraph',
+ 'inputGraphRootPath' : 'dedupGraphPath',
+ 'outputGraphRootPath' : 'inferredGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'activePromoteDatasetActionPayload' : 'true',
+ 'activePromoteDatasourceActionPayload' : 'true',
+ 'activePromoteOrganizationActionPayload' : 'true',
+ 'activePromoteOtherResearchProductActionPayload' : 'true',
+ 'activePromoteProjectActionPayload' : 'true',
+ 'activePromotePublicationActionPayload' : 'true',
+ 'activePromoteRelationActionPayload' : 'true',
+ 'activePromoteResultActionPayload' : 'true',
+ 'activePromoteSoftwareActionPayload' : 'true',
+ 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
+ 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS'
+ }
+
+ build-report
+
+
+
+
+
+
+ mark duplicates as deleted and redistribute the relationships
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath' : 'inferredGraphPath',
+ 'graphOutputPath': 'consistentGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app',
+ 'workingPath' : '/tmp/prod_provision/working_dir/dedup'
+ }
+
+ build-report
+
+
+
+
+
+
+ propagates ORCID among results linked by allowedsemrels semantic relationships
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'consistentGraphPath',
+ 'outputPath': 'orcidGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/orcid',
+ 'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ mark results respecting some rules as belonging to communities
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'orcidGraphPath',
+ 'outputPath': 'bulkTaggingGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl',
+ 'pathMap' : 'bulkTaggingPathMap'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/bulktag'
+ }
+
+ build-report
+
+
+
+
+
+
+ creates relashionships between results and organizations when the organizations are associated to institutional repositories
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'bulkTaggingGraphPath',
+ 'outputPath': 'affiliationGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/affiliation',
+ 'saveGraph' : 'true',
+ 'blacklist' : 'empty'
+ }
+
+ build-report
+
+
+
+
+
+
+ marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'affiliationGraphPath',
+ 'outputPath': 'communityOrganizationGraphPath',
+ 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/community_organization',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'communityOrganizationGraphPath',
+ 'outputPath': 'fundingGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/funding',
+ 'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'fundingGraphPath',
+ 'outputPath': 'communitySemRelGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/community_semrel',
+ 'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'communitySemRelGraphPath',
+ 'outputPath': 'countryGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'workingDir' : '/tmp/prod_provision/working_dir/country',
+ 'allowedtypes' : 'pubsrepository::institutional',
+ 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'countryGraphPath',
+ 'graphOutputPath': 'cleanedGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/clean'
+ }
+
+ build-report
+
+
+
+
+
+
+ removes blacklisted relations
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'cleanedGraphPath',
+ 'outputPath': 'blacklistedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/blacklist',
+ 'postgresURL' : '',
+ 'postgresUser' : '',
+ 'postgresPassword' : ''
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210723_171026_279
+ 2021-07-24T00:00:39+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml
new file mode 100644
index 0000000000..836e69d6f3
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml
@@ -0,0 +1,74 @@
+
+
+
+ Graph to HiveDB [PROD]
+ Data Provision
+ 30
+
+
+ Set the path containing the AGGREGATOR graph
+
+ inputPath
+
+
+
+
+
+
+
+ Set the target path to store the RAW graph
+
+ hiveDbName
+
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ create the AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputPath' : 'inputPath',
+ 'hiveDbName' : 'hiveDbName'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hive/oozie_app',
+ 'sparkDriverMemory' : '4G',
+ 'sparkExecutorMemory' : '10G',
+ 'sparkExecutorCores' : '3'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210728_075001_400
+ 2021-07-28T08:04:00+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml
new file mode 100644
index 0000000000..6cdf41bb63
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml
@@ -0,0 +1,99 @@
+
+
+
+ Update Solr [PROD]
+ Data Provision
+ 30
+
+
+ Set the path containing the GRAPH to index
+
+ inputGraphRootPath
+ /tmp/prod_provision/graph/14_graph_blacklisted
+
+
+
+
+
+
+ Set the target path to store the RAW graph
+
+ format
+ DMF
+
+
+
+
+
+
+ Set the lookup address
+
+ isLookupUrl
+ http://services.openaire.eu:8280/is/services/isLookUp?wsdl
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ create the AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputGraphRootPath' : 'inputGraphRootPath',
+ 'isLookupUrl' : 'isLookupUrl',
+ 'format' : 'format'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/provision/oozie_app',
+ 'sourceMaxRelations' : '1000',
+ 'targetMaxRelations' : '10000000',
+ 'relPartitions' : '3000',
+ 'batchSize' : '2000',
+ 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments,cites,isCitedBy',
+ 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource',
+ 'resumeFrom' : 'prepare_relations',
+ 'shouldIndex' : 'true',
+ 'outputFormat' : 'SOLR',
+ 'sparkDriverMemoryForJoining' : '3G',
+ 'sparkExecutorMemoryForJoining' : '7G',
+ 'sparkExecutorCoresForJoining' : '4',
+ 'sparkDriverMemoryForIndexing' : '2G',
+ 'sparkExecutorMemoryForIndexing' : '2G',
+ 'sparkExecutorCoresForIndexing' : '64',
+ 'sparkNetworkTimeout' : '600',
+ 'workingDir' : '/tmp/prod_provision/working_dir/update_solr'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210724_062705_620
+ 2021-07-25T13:25:37+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml
new file mode 100644
index 0000000000..4dfae3c7d6
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml
@@ -0,0 +1,100 @@
+
+
+
+ Update Stats [PROD]
+ Data Provision
+ 30
+
+
+ Set the OpenAIRE graph DB name
+
+ openaire_db_name
+ openaire_prod_yyyyMMdd
+
+
+
+
+
+
+ Set the STATS DB name
+
+ stats_db_name
+ openaire_prod_stats_yyyyMMdd
+
+
+
+
+
+
+ Set the STATS MONITOR DB name
+
+ monitor_db_name
+ openaire_prod_stats_monitor_yyyyMMdd
+
+
+
+
+
+
+ Set the STATS OBSERVATORY DB name
+
+ observatory_db_name
+ openaire_prod_stats_observatory_yyyyMMdd
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ update the content in the stats DB
+
+ executeOozieJob
+ IIS
+
+ {
+ 'openaire_db_name' : 'openaire_db_name',
+ 'stats_db_name' : 'stats_db_name',
+ 'monitor_db_name' : 'monitor_db_name',
+ 'observatory_db_name' : 'observatory_db_name'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_update/oozie_app',
+ 'hive_timeout' : '15000',
+ 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool',
+ 'stats_db_shadow_name' : 'openaire_prod_stats_shadow',
+ 'external_stats_db_name' : 'stats_ext',
+ 'monitor_db_shadow_name' : 'openaire_prod_stats_monitor_shadow',
+ 'observatory_db_shadow_name' : 'openaire_prod_stats_observatory_shadow',
+ 'context_api_url' : 'https://services.openaire.eu/openaire'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210725_065608_71
+ 2021-07-26T07:35:55+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml
new file mode 100644
index 0000000000..d8def071fd
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml
@@ -0,0 +1,87 @@
+
+
+
+ Publish Stats [PROD]
+ Content Publishing
+ 35
+
+
+ Set the STATS DB name
+
+ stats_db_name
+ openaire_prod_stats_yyyyMMdd
+
+
+
+
+
+
+ Set the STATS MONITOR DB name
+
+ monitor_db_name
+ openaire_prod_stats_monitor_yyyyMMdd
+
+
+
+
+
+
+ Set the STATS OBSERVATORY DB name
+
+ observatory_db_name
+ openaire_prod_stats_observatory_yyyyMMdd
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ publishes the stats DB to the public schema
+
+ executeOozieJob
+ IIS
+
+ {
+ 'stats_db_name' : 'stats_db_name',
+ 'monitor_db_name' : 'monitor_db_name',
+ 'observatory_db_name' : 'observatory_db_name'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_promote/oozie_app',
+ 'hive_timeout' : '150000',
+ 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool',
+ 'stats_db_production_name' : 'openaire_prod_stats',
+ 'monitor_db_production_name' : 'openaire_prod_stats_monitor',
+ 'observatory_db_production_name' : 'openaire_prod_stats_observatory'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210727_160728_625
+ 2021-07-27T16:53:01+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml
new file mode 100644
index 0000000000..cf337fd7e0
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml
@@ -0,0 +1,131 @@
+
+
+
+ Update Broker events [PROD OCEAN]
+ Data Provision
+ 30
+
+
+ Set the path containing the GRAPH to scan
+
+ graphInputPath
+
+
+
+
+
+
+
+ Set the datasource Ids Whitelist
+
+ datasourceIdWhitelist
+ openaire____::9ecafa3655143cbc4bc75853035cd432,opendoar____::dc6e224a8d74ce03bf301152d6e33e97,openaire____::09da65eaaa6deac2f785df1e0ae95a06,openaire____::3db634fc5446f389d0b826ea400a5da6,openaire____::5a38cb462ac487bf26bdb86009fe3e74,openaire____::3c29379cc184f66861e858bc7aa9615b,openaire____::4657147e48a1f32637bfe3743bce76c6,openaire____::c3267ea1c3f378c456209b6df241624e,opendoar____::358aee4cc897452c00244351e4d91f69,re3data_____::7b0ad08687b2c960d5aeef06f811d5e6,opendoar____::798ed7d4ee7138d49b8828958048130a,opendoar____::6f4922f45568161a8cdf4ad2299f6d23,opendoar____::4aa0e93b918848be0b7728b4b1568d8a,openaire____::02b55e4f52388520bfe11f959f836e68
+
+
+
+
+
+
+ Set the datasource type Whitelist
+
+ datasourceTypeWhitelist
+ pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic,datarepository::unknown,orprepository,softwarerepository
+
+
+
+
+
+
+ Set the datasource Id Blacklist
+
+ datasourceIdBlacklist
+ -
+
+
+
+
+
+
+ Set the TOPIC whitelist (* = all topics)
+
+ topicWhitelist
+ ENRICH/MISSING/SUBJECT/DDC,ENRICH/MISSING/SUBJECT/JEL,ENRICH/MISSING/SUBJECT/MESHEUROPMC,ENRICH/MISSING/PUBLICATION_DATE,ENRICH/MISSING/PID,ENRICH/MISSING/PROJECT,ENRICH/MISSING/SUBJECT/ACM,ENRICH/MISSING/SUBJECT/ARXIV,ENRICH/MISSING/OPENACCESS_VERSION,ENRICH/MISSING/AUTHOR/ORCID,ENRICH/MISSING/ABSTRACT,ENRICH/MORE/SUBJECT/ACM,ENRICH/MORE/SUBJECT/ARXIV,ENRICH/MORE/SUBJECT/DDC,ENRICH/MORE/SUBJECT/JEL,ENRICH/MORE/OPENACCESS_VERSION,ENRICH/MORE/SUBJECT/MESHEUROPMC,ENRICH/MORE/PID
+
+
+
+
+
+
+ Set the output path to store the Event records
+
+ outputDir
+ /var/lib/dnet/broker_PROD/events
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+ update the BROKER events
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'graphInputPath',
+ 'datasourceIdWhitelist' : 'datasourceIdWhitelist',
+ 'datasourceTypeWhitelist' : 'datasourceTypeWhitelist',
+ 'datasourceIdBlacklist' : 'datasourceIdBlacklist',
+ 'topicWhitelist' : 'topicWhitelist',
+ 'outputDir' : 'outputDir'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/broker/generate_events/oozie_app',
+ 'esEventIndexName' : '',
+ 'esNotificationsIndexName' : '',
+ 'esIndexHost' : '',
+ 'maxIndexedEventsForDsAndTopic' : '100',
+ 'esBatchWriteRetryCount' : '8',
+ 'esBatchWriteRetryWait' : '60s',
+ 'esBatchSizeEntries' : '200',
+ 'esNodesWanOnly' : 'true',
+ 'brokerApiBaseUrl' : '',
+ 'brokerDbUrl' : '',
+ 'brokerDbUser' : '',
+ 'brokerDbPassword' : '',
+ 'sparkDriverMemory' : '3G',
+ 'sparkExecutorMemory' : '7G',
+ 'sparkExecutorCores' : '6',
+ 'workingDir' : '/tmp/prod_provision/working_dir/broker_events'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wf_20210709_073839_206
+ 2021-07-09T11:01:01+00:00
+ FAILURE
+
+
+
+
\ No newline at end of file