diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml
index c10dd4e99..82cf9d3d5 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml
@@ -4,7 +4,7 @@
-
+
Graph processing [EXPERIMENT]
@@ -15,7 +15,7 @@
set the path of unresolved entities
unresolvedEntityPath
- /data/unresolved_BETA
+ /data/unresolved_BETA/content
@@ -51,6 +51,16 @@
+
+ set the number of iteration in affiliation propagation
+
+ iterations
+ 1
+
+
+
+
+
Set the target path to store the MERGED graph
@@ -91,11 +101,21 @@
+
+ Set the target path to store the GROUPED graph
+
+ groupedGraphPath
+ /tmp/beta_experiment/graph/05_graph_grouped
+
+
+
+
+
Set the target path to store the INFERRED graph
inferredGraphPath
- /tmp/beta_experiment/graph/05_graph_inferred
+ /tmp/beta_experiment/graph/06_graph_inferred
@@ -105,7 +125,7 @@
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/beta_experiment/graph/06_graph_dedup
+ /tmp/beta_experiment/graph/07_graph_dedup
@@ -115,7 +135,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/beta_experiment/graph/07_graph_consistent
+ /tmp/beta_experiment/graph/08_graph_consistent
@@ -125,7 +145,7 @@
Set the target path to store the ORCID enriched graph
orcidGraphPath
- /tmp/beta_experiment/graph/08_graph_orcid
+ /tmp/beta_experiment/graph/09_graph_orcid
@@ -135,7 +155,7 @@
Set the target path to store the BULK TAGGED graph
bulkTaggingGraphPath
- /tmp/beta_experiment/graph/09_graph_bulktagging
+ /tmp/beta_experiment/graph/10_graph_bulktagging
@@ -145,7 +165,7 @@
Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
affiliationGraphPath
- /tmp/beta_experiment/graph/10_graph_affiliation
+ /tmp/beta_experiment/graph/11_graph_affiliation
@@ -155,7 +175,7 @@
Set the target path to store the AFFILIATION from SEMATIC RELATION graph
affiliationSemRelGraphPath
- /tmp/beta_experiment/graph/11_graph_affiliationsr
+ /tmp/beta_experiment/graph/12_graph_affiliationsr
@@ -165,7 +185,7 @@
Set the target path to store the COMMUNITY from SELECTED SOURCES graph
communityOrganizationGraphPath
- /tmp/beta_experiment/graph/12_graph_community_organization
+ /tmp/beta_experiment/graph/13_graph_community_organization
@@ -175,7 +195,7 @@
Set the target path to store the FUNDING from SEMANTIC RELATION graph
fundingGraphPath
- /tmp/beta_experiment/graph/13_graph_funding
+ /tmp/beta_experiment/graph/14_graph_funding
@@ -185,7 +205,7 @@
Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
communitySemRelGraphPath
- /tmp/beta_experiment/graph/14_graph_community_sem_rel
+ /tmp/beta_experiment/graph/15_graph_community_sem_rel
@@ -195,7 +215,7 @@
Set the target path to store the COUNTRY enriched graph
countryGraphPath
- /tmp/beta_experiment/graph/15_graph_country
+ /tmp/beta_experiment/graph/16_graph_country
@@ -205,7 +225,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/beta_experiment/graph/16_graph_cleaned
+ /tmp/beta_experiment/graph/17_graph_cleaned
@@ -215,7 +235,7 @@
Set the target path to store the blacklisted graph
blacklistedGraphPath
- /tmp/beta_experiment/graph/17_graph_blacklisted
+ /tmp/beta_experiment/graph/18_graph_blacklisted
@@ -548,14 +568,14 @@
'mongoURL' : 'mongodb://beta.services.openaire.eu',
'mongoDb' : 'mdstore',
'mdstoreManagerUrl' : 'https://beta.services.openaire.eu/mdstoremanager',
- 'postgresURL' : '',
+ 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : '',
'postgresPassword' : '',
- 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.40:5432/oa_organizations',
'postgresOpenOrgsUser' : '',
'postgresOpenOrgsPassword' : '',
'shouldHashId' : 'true',
- 'importOpenorgs' : 'true',
+ 'importOpenorgs' : 'false',
'workingDir' : '/tmp/beta_experiment/working_dir/beta_aggregator'
}
@@ -594,10 +614,10 @@
'mongoURL' : 'mongodb://services.openaire.eu',
'mongoDb' : 'mdstore',
'mdstoreManagerUrl' : 'https://services.openaire.eu/mdstoremanager',
- 'postgresURL' : '',
+ 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : '',
'postgresPassword' : '',
- 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.39:5432/oa_organizations',
'postgresOpenOrgsUser' : '',
'postgresOpenOrgsPassword' : '',
'shouldHashId' : 'true',
@@ -737,11 +757,11 @@
executeOozieJob
IIS
- {
+ {
'graphBasePath':'cleanedFirstGraphPath',
'unresolvedPath' :'unresolvedEntityPath',
'targetPath':'resolvedGraphPath'
- }
+ }
{
@@ -752,6 +772,30 @@
}
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'resolvedGraphPath',
+ 'targetPath':'groupedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app',
+ 'workingDir' : '/tmp/beta_experiment/working_dir/grouping',
+ 'sparkExecutorCores' : '4',
+ 'sparkExecutorMemory' : '7G'
+ }
+
+
@@ -867,9 +911,9 @@
import_mdstore_service_location
import_dataset_mdstore_ids_csv
oozie.wf.application.path
- /lib/iis/primary/snapshots/2021-09-24
+ /lib/iis/primary/snapshots/2021-12-09
IIS
- /tmp/beta_inference/graph/07_graph_cleaned
+ deprecated - not used
import_infospace_graph_location
import_project_concepts_context_ids_csv
@@ -908,7 +952,7 @@
'import_islookup_service_location' : 'import_islookup_service_location',
'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv',
'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv',
- 'import_infospace_graph_location' : 'import_infospace_graph_location',
+ 'import_infospace_graph_location' : 'groupedGraphPath',
'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations',
'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets',
@@ -958,7 +1002,7 @@
{
'inputActionSetIds' : 'actionSetIdsIISGraph',
- 'inputGraphRootPath' : 'resolvedGraphPath',
+ 'inputGraphRootPath' : 'groupedGraphPath',
'outputGraphRootPath' : 'inferredGraphPath',
'isLookupUrl' : 'isLookUpUrl'
}
@@ -1125,7 +1169,8 @@
{
'sourcePath' : 'affiliationGraphPath',
- 'outputPath': 'affiliationSemRelGraphPath'
+ 'outputPath': 'affiliationSemRelGraphPath',
+ 'iterations':'iterations'
}
@@ -1283,7 +1328,7 @@
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app',
'workingDir' : '/tmp/beta_experiment/working_dir/blacklist',
- 'postgresURL' : '',
+ 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : '',
'postgresPassword' : ''
}
@@ -1296,10 +1341,10 @@
- wf_20211206_093743_83
- 2021-12-06T10:12:32+00:00
- SUCCESS
-
+ wf_20220111_200505_785
+ 2022-01-11T20:08:53+00:00
+
+
-
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml
index 2fed35f44..df9528f4c 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml
@@ -11,6 +11,16 @@
IIS
30
+
+ set the path of unresolved entities
+
+ unresolvedEntityPath
+ /data/unresolved_BETA/content
+
+
+
+
+
set blacklist of funder nsPrefixes from the beta aggregator
@@ -71,11 +81,31 @@
+
+ Set the target path to store the CLEANED graph
+
+ resolvedGraphPath
+ /tmp/beta_inference/graph/04_graph_resolved
+
+
+
+
+
+
+ Set the target path to store the GROUPED graph
+
+ groupedGraphPath
+ /tmp/beta_inference/graph/05_graph_grouped
+
+
+
+
+
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/beta_inference/graph/04_graph_dedup
+ /tmp/beta_inference/graph/06_graph_dedup
@@ -85,7 +115,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/beta_inference/graph/05_graph_consistent
+ /tmp/beta_inference/graph/07_graph_consistent
@@ -95,7 +125,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/beta_inference/graph/06_graph_cleaned
+ /tmp/beta_inference/graph/08_graph_cleaned
@@ -548,6 +578,55 @@
build-report
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'cleanedFirstGraphPath',
+ 'unresolvedPath' :'unresolvedEntityPath',
+ 'targetPath':'resolvedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app',
+ 'workingDir' : '/tmp/beta_inference/working_dir/relation_resolution',
+ 'sparkExecutorCores' : '2',
+ 'sparkExecutorMemory' : '12G'
+ }
+
+
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'resolvedGraphPath',
+ 'targetPath':'groupedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app',
+ 'workingDir' : '/tmp/beta_inference/working_dir/grouping',
+ 'sparkExecutorCores' : '4',
+ 'sparkExecutorMemory' : '7G'
+ }
+
+
@@ -560,7 +639,7 @@
{
'actionSetId' : 'dedupConfig',
- 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'graphBasePath' : 'groupedGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml
index e5ce3d710..0ea6be341 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml
@@ -11,6 +11,16 @@
IIS
30
+
+ set the path of unresolved entities
+
+ unresolvedEntityPath
+ /data/unresolved_PROD/content
+
+
+
+
+
set blacklist of funder nsPrefixes
@@ -61,11 +71,21 @@
+
+ Set the target path to store the CLEANED graph
+
+ resolvedGraphPath
+ /tmp/beta_inference/graph/03_graph_resolved
+
+
+
+
+
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/prod_inference/graph/03_graph_dedup
+ /tmp/prod_inference/graph/04_graph_dedup
@@ -75,7 +95,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/prod_inference/graph/04_graph_consistent
+ /tmp/prod_inference/graph/05_graph_consistent
@@ -85,7 +105,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/prod_inference/graph/05_graph_cleaned
+ /tmp/prod_inference/graph/06_graph_cleaned
@@ -347,6 +367,31 @@
build-report
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'cleanedFirstGraphPath',
+ 'unresolvedPath' :'unresolvedEntityPath',
+ 'targetPath':'resolvedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app',
+ 'workingDir' : '/tmp/prod_inference/working_dir/relation_resolution',
+ 'sparkExecutorCores' : '2',
+ 'sparkExecutorMemory' : '12G'
+ }
+
+
@@ -359,7 +404,7 @@
{
'actionSetId' : 'dedupConfig',
- 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'graphBasePath' : 'resolvedGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml
index f83337b3c..73c44aba8 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml
@@ -11,6 +11,16 @@
Data Provision
30
+
+ set the path of unresolved entities
+
+ unresolvedEntityPath
+ /data/unresolved_BETA/content
+
+
+
+
+
set blacklist of funder nsPrefixes from the beta aggregator
@@ -71,11 +81,31 @@
+
+ Set the target path to store the CLEANED graph
+
+ resolvedGraphPath
+ /tmp/beta_provision/graph/04_graph_resolved
+
+
+
+
+
+
+ Set the target path to store the GROUPED graph
+
+ groupedGraphPath
+ /tmp/beta_provision/graph/05_graph_grouped
+
+
+
+
+
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/beta_provision/graph/04_graph_dedup
+ /tmp/beta_provision/graph/06_graph_dedup
@@ -85,7 +115,7 @@
Set the target path to store the INFERRED graph
inferredGraphPath
- /tmp/beta_provision/graph/05_graph_inferred
+ /tmp/beta_provision/graph/07_graph_inferred
@@ -95,7 +125,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/beta_provision/graph/06_graph_consistent
+ /tmp/beta_provision/graph/08_graph_consistent
@@ -105,7 +135,7 @@
Set the target path to store the ORCID enriched graph
orcidGraphPath
- /tmp/beta_provision/graph/07_graph_orcid
+ /tmp/beta_provision/graph/09_graph_orcid
@@ -115,7 +145,7 @@
Set the target path to store the BULK TAGGED graph
bulkTaggingGraphPath
- /tmp/beta_provision/graph/08_graph_bulktagging
+ /tmp/beta_provision/graph/10_graph_bulktagging
@@ -125,7 +155,17 @@
Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
affiliationGraphPath
- /tmp/beta_provision/graph/09_graph_affiliation
+ /tmp/beta_provision/graph/11_graph_affiliation
+
+
+
+
+
+
+ Set the target path to store the AFFILIATION from SEMATIC RELATION graph
+
+ affiliationSemRelGraphPath
+ /tmp/beta_provision/graph/12_graph_affiliationsr
@@ -135,7 +175,7 @@
Set the target path to store the COMMUNITY from SELECTED SOURCES graph
communityOrganizationGraphPath
- /tmp/beta_provision/graph/10_graph_comunity_organization
+ /tmp/beta_provision/graph/13_graph_comunity_organization
@@ -145,7 +185,7 @@
Set the target path to store the FUNDING from SEMANTIC RELATION graph
fundingGraphPath
- /tmp/beta_provision/graph/11_graph_funding
+ /tmp/beta_provision/graph/14_graph_funding
@@ -155,7 +195,7 @@
Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
communitySemRelGraphPath
- /tmp/beta_provision/graph/12_graph_comunity_sem_rel
+ /tmp/beta_provision/graph/15_graph_comunity_sem_rel
@@ -165,7 +205,7 @@
Set the target path to store the COUNTRY enriched graph
countryGraphPath
- /tmp/beta_provision/graph/13_graph_country
+ /tmp/beta_provision/graph/16_graph_country
@@ -175,7 +215,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/beta_provision/graph/14_graph_cleaned
+ /tmp/beta_provision/graph/17_graph_cleaned
@@ -185,7 +225,7 @@
Set the target path to store the blacklisted graph
blacklistedGraphPath
- /tmp/beta_provision/graph/15_graph_blacklisted
+ /tmp/beta_provision/graph/18_graph_blacklisted
@@ -695,6 +735,55 @@
build-report
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'cleanedFirstGraphPath',
+ 'unresolvedPath' :'unresolvedEntityPath',
+ 'targetPath':'resolvedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/relation_resolution',
+ 'sparkExecutorCores' : '2',
+ 'sparkExecutorMemory' : '12G'
+ }
+
+
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'resolvedGraphPath',
+ 'targetPath':'groupedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/grouping',
+ 'sparkExecutorCores' : '4',
+ 'sparkExecutorMemory' : '7G'
+ }
+
+
@@ -707,7 +796,7 @@
{
'actionSetId' : 'dedupConfig',
- 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'graphBasePath' : 'groupedGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml
index be6155f2f..205db29a6 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml
@@ -11,6 +11,16 @@
Data Provision
30
+
+ set the path of unresolved entities
+
+ unresolvedEntityPath
+ /data/unresolved_PROD/content
+
+
+
+
+
set blacklist of funder nsPrefixes
@@ -51,11 +61,21 @@
+
+ Set the target path to store the CLEANED graph
+
+ resolvedGraphPath
+ /tmp/beta_inference/graph/03_graph_resolved
+
+
+
+
+
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/prod_provision/graph/03_graph_dedup
+ /tmp/prod_provision/graph/04_graph_dedup
@@ -65,7 +85,7 @@
Set the target path to store the INFERRED graph
inferredGraphPath
- /tmp/prod_provision/graph/04_graph_inferred
+ /tmp/prod_provision/graph/05_graph_inferred
@@ -75,7 +95,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/prod_provision/graph/05_graph_consistent
+ /tmp/prod_provision/graph/06_graph_consistent
@@ -85,7 +105,7 @@
Set the target path to store the ORCID enriched graph
orcidGraphPath
- /tmp/prod_provision/graph/06_graph_orcid
+ /tmp/prod_provision/graph/07_graph_orcid
@@ -95,7 +115,7 @@
Set the target path to store the BULK TAGGED graph
bulkTaggingGraphPath
- /tmp/prod_provision/graph/07_graph_bulktagging
+ /tmp/prod_provision/graph/08_graph_bulktagging
@@ -105,7 +125,7 @@
Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
affiliationGraphPath
- /tmp/prod_provision/graph/08_graph_affiliation
+ /tmp/prod_provision/graph/09_graph_affiliation
@@ -115,7 +135,7 @@
Set the target path to store the COMMUNITY from SELECTED SOURCES graph
communityOrganizationGraphPath
- /tmp/prod_provision/graph/09_graph_comunity_organization
+ /tmp/prod_provision/graph/10_graph_comunity_organization
@@ -125,7 +145,7 @@
Set the target path to store the FUNDING from SEMANTIC RELATION graph
fundingGraphPath
- /tmp/prod_provision/graph/10_graph_funding
+ /tmp/prod_provision/graph/11_graph_funding
@@ -135,7 +155,7 @@
Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
communitySemRelGraphPath
- /tmp/prod_provision/graph/11_graph_comunity_sem_rel
+ /tmp/prod_provision/graph/12_graph_comunity_sem_rel
@@ -145,7 +165,7 @@
Set the target path to store the COUNTRY enriched graph
countryGraphPath
- /tmp/prod_provision/graph/12_graph_country
+ /tmp/prod_provision/graph/13_graph_country
@@ -155,7 +175,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/prod_provision/graph/13_graph_cleaned
+ /tmp/prod_provision/graph/14_graph_cleaned
@@ -165,7 +185,7 @@
Set the target path to store the blacklisted graph
blacklistedGraphPath
- /tmp/prod_provision/graph/14_graph_blacklisted
+ /tmp/prod_provision/graph/15_graph_blacklisted
@@ -446,6 +466,59 @@
build-report
+
+
+
+
+
+ updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'cleanedFirstGraphPath'
+ }
+
+
+ {
+ 'resumeFrom' : 'prepareInfo',
+ 'hostedByMapPath' : '/user/dnet.production/data/hostedByMap',
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap',
+ 'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G'
+ }
+
+ build-report
+
+
+
+
+
+
+ Graph resolution
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'cleanedFirstGraphPath',
+ 'unresolvedPath' :'unresolvedEntityPath',
+ 'targetPath':'resolvedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution',
+ 'shouldResolveEntities' : 'false',
+ 'sparkExecutorCores' : '4',
+ 'sparkExecutorMemory' : '9G'
+ }
+
+
@@ -458,7 +531,7 @@
{
'actionSetId' : 'dedupConfig',
- 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'graphBasePath' : 'resolvedGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}