diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphSparkJob.java
new file mode 100644
index 0000000000..d33b9e8468
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphSparkJob.java
@@ -0,0 +1,130 @@
+package eu.dnetlib.dhp.oa.graph.merge;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.*;
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Tuple2;
+
+import java.util.Objects;
+import java.util.Optional;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+/**
+ * Combines the content from two aggregator graph tables of the same type, entities (or relationships) with the same ids
+ * are picked preferring those from the BETA aggregator rather then from PROD. The identity of a relationship is defined
+ * by eu.dnetlib.dhp.schema.common.ModelSupport#idFn()
+ */
+public class MergeGraphSparkJob {
+
+ private static final Logger log = LoggerFactory.getLogger(CleanGraphSparkJob.class);
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static void main(String[] args) throws Exception {
+
+ String jsonConfiguration = IOUtils
+ .toString(
+ CleanGraphSparkJob.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json"));
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ String betaInputPath = parser.get("betaInputPath");
+ log.info("betaInputPath: {}", betaInputPath);
+
+ String prodInputPath = parser.get("prodInputPath");
+ log.info("prodInputPath: {}", prodInputPath);
+
+ String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ String graphTableClassName = parser.get("graphTableClassName");
+ log.info("graphTableClassName: {}", graphTableClassName);
+
+ Class extends OafEntity> entityClazz = (Class extends OafEntity>) Class.forName(graphTableClassName);
+
+ SparkConf conf = new SparkConf();
+ conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+ conf.registerKryoClasses(ModelSupport.getOafModelClasses());
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ removeOutputDir(spark, outputPath);
+ mergeGraphTable(spark, betaInputPath, prodInputPath, entityClazz, entityClazz, outputPath);
+ });
+ }
+
+ private static
void mergeGraphTable(
+ SparkSession spark,
+ String betaInputPath,
+ String prodInputPath,
+ Class
p_clazz,
+ Class b_clazz,
+ String outputPath) {
+
+ Dataset> beta = readTableFromPath(spark, betaInputPath, b_clazz);
+ Dataset> prod = readTableFromPath(spark, prodInputPath, p_clazz);
+
+ prod.joinWith(beta, prod.col("_1").equalTo(beta.col("_1")), "full_outer")
+ .map((MapFunction, Tuple2>, P>) value -> {
+ Optional p = Optional.ofNullable(value._1()).map(Tuple2::_2);
+ Optional b = Optional.ofNullable(value._2()).map(Tuple2::_2);
+ if (p.isPresent() & !b.isPresent()) {
+ return p.get();
+ }
+ if (b.isPresent()) {
+ return (P) b.get();
+ }
+ return null;
+ }, Encoders.bean(p_clazz))
+ .filter((FilterFunction
) Objects::nonNull)
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath);
+ }
+
+ private static Dataset> readTableFromPath(
+ SparkSession spark, String inputEntityPath, Class clazz) {
+
+ log.info("Reading Graph table from: {}", inputEntityPath);
+ return spark
+ .read()
+ .textFile(inputEntityPath)
+ .map(
+ (MapFunction>) value -> {
+ final T t = OBJECT_MAPPER.readValue(value, clazz);
+ final String id = ModelSupport.idFn().apply(t);
+ return new Tuple2<>(id, t);
+ },
+ Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
+ }
+
+ private static void removeOutputDir(SparkSession spark, String path) {
+ HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/config-default.xml
new file mode 100644
index 0000000000..2e0ed9aeea
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/config-default.xml
@@ -0,0 +1,18 @@
+
+
+ jobTracker
+ yarnRM
+
+
+ nameNode
+ hdfs://nameservice1
+
+
+ oozie.use.system.libpath
+ true
+
+
+ oozie.action.sharelib.for.spark
+ spark2
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/workflow.xml
new file mode 100644
index 0000000000..0a512fb6ab
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge/oozie_app/workflow.xml
@@ -0,0 +1,282 @@
+
+
+
+
+ betaInputGgraphPath
+ the beta graph root path
+
+
+ prodInputGgraphPath
+ the production graph root path
+
+
+ graphOutputPath
+ the output merged graph root path
+
+
+
+ sparkDriverMemory
+ memory for driver process
+
+
+ sparkExecutorMemory
+ memory for individual executor
+
+
+ sparkExecutorCores
+ number of cores used by single executor
+
+
+ oozieActionShareLibForSpark2
+ oozie action sharelib for spark 2.*
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+ spark 2.* extra listeners classname
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+ spark 2.* sql query execution listeners classname
+
+
+ spark2YarnHistoryServerAddress
+ spark 2.* yarn history server address
+
+
+ spark2EventLogDir
+ spark 2.* event log dir location
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Merge publications
+ eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --betaInputPath${betaInputGgraphPath}/publication
+ --prodInputPath${prodInputGgraphPath}/publication
+ --outputPath${graphOutputPath}/publication
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Merge datasets
+ eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --betaInputPath${betaInputGgraphPath}/dataset
+ --prodInputPath${prodInputGgraphPath}/dataset
+ --outputPath${graphOutputPath}/dataset
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Merge otherresearchproducts
+ eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --betaInputPath${betaInputGgraphPath}/otherresearchproduct
+ --prodInputPath${prodInputGgraphPath}/otherresearchproduct
+ --outputPath${graphOutputPath}/otherresearchproduct
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Merge softwares
+ eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --betaInputPath${betaInputGgraphPath}/software
+ --prodInputPath${prodInputGgraphPath}/software
+ --outputPath${graphOutputPath}/software
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Merge datasources
+ eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --betaInputPath${betaInputGgraphPath}/datasource
+ --prodInputPath${prodInputGgraphPath}/datasource
+ --outputPath${graphOutputPath}/datasource
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Merge organizations
+ eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --betaInputPath${betaInputGgraphPath}/organization
+ --prodInputPath${prodInputGgraphPath}/organization
+ --outputPath${graphOutputPath}/organization
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Merge projects
+ eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --betaInputPath${betaInputGgraphPath}/project
+ --prodInputPath${prodInputGgraphPath}/project
+ --outputPath${graphOutputPath}/project
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Merge relations
+ eu.dnetlib.dhp.oa.graph.merge.MergeGraphSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+
+ --betaInputPath${betaInputGgraphPath}/relation
+ --prodInputPath${prodInputGgraphPath}/relation
+ --outputPath${graphOutputPath}/relation
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json
new file mode 100644
index 0000000000..6018b7e93c
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json
@@ -0,0 +1,32 @@
+[
+ {
+ "paramName": "issm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "when true will stop SparkSession after job execution",
+ "paramRequired": false
+ },
+ {
+ "paramName": "bin",
+ "paramLongName": "betaInputPath",
+ "paramDescription": "the beta graph root path",
+ "paramRequired": true
+ },
+ {
+ "paramName": "pin",
+ "paramLongName": "prodInputPath",
+ "paramDescription": "the production graph root path",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the output merged graph root path",
+ "paramRequired": true
+ },
+ {
+ "paramName": "class",
+ "paramLongName": "graphTableClassName",
+ "paramDescription": "class name moelling the graph table",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml
new file mode 100644
index 0000000000..08ed24cd07
--- /dev/null
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml
@@ -0,0 +1,779 @@
+
+
+
+ Graph Construction [HYBRID]
+ Data Provision
+ 30
+
+
+
+ reuse cached content from the PROD aggregation system
+
+ reuseProdContent
+ true
+
+
+
+
+
+
+
+ set the PROD aggregator content path
+
+ prodContentPath
+ /tmp/core_aggregator
+
+
+
+
+
+
+
+ Set the path containing the PROD AGGREGATOR graph
+
+ prodAggregatorGraphPath
+ /tmp/core_provision/graph/00_prod_graph_aggregator
+
+
+
+
+
+
+
+ reuse cached content from the BETA aggregation system
+
+ reuseBetaContent
+ true
+
+
+
+
+
+
+
+ set the BETA aggregator content path
+
+ betaContentPath
+ /tmp/beta_aggregator
+
+
+
+
+
+
+
+ Set the path containing the BETA AGGREGATOR graph
+
+ betaAggregatorGraphPath
+ /tmp/core_provision/graph/00_beta_graph_aggregator
+
+
+
+
+
+
+
+ Set the IS lookup service address
+
+ isLookUpUrl
+ http://services.openaire.eu:8280/is/services/isLookUp?wsdl
+
+
+
+
+
+
+
+ Set the target path to store the MERGED graph
+
+ mergedGraphPath
+ /tmp/core_provision/graph/01_graph_merged
+
+
+
+
+
+
+
+ Set the target path to store the RAW graph
+
+ rawGraphPath
+ /tmp/core_provision/graph/02_graph_raw
+
+
+
+
+
+
+
+ Set the target path to store the DEDUPED graph
+
+ dedupGraphPath
+ /tmp/core_provision/graph/03_graph_dedup
+
+
+
+
+
+
+
+ Set the target path to store the INFERRED graph
+
+ inferredGraphPath
+ /tmp/core_provision/graph/04_graph_inferred
+
+
+
+
+
+
+
+ Set the target path to store the CONSISTENCY graph
+
+ consistentGraphPath
+ /tmp/core_provision/graph/05_graph_consistent
+
+
+
+
+
+
+
+ Set the target path to store the ORCID enriched graph
+
+ orcidGraphPath
+ /tmp/core_provision/graph/06_graph_orcid
+
+
+
+
+
+
+
+ Set the target path to store the BULK TAGGED graph
+
+ bulkTaggingGraphPath
+ /tmp/core_provision/graph/07_graph_bulktagging
+
+
+
+
+
+
+
+ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
+
+ affiliationGraphPath
+ /tmp/core_provision/graph/08_graph_affiliation
+
+
+
+
+
+
+
+ Set the target path to store the COMMUNITY from SELECTED SOURCES graph
+
+ communityOrganizationGraphPath
+ /tmp/core_provision/graph/09_graph_comunity_organization
+
+
+
+
+
+
+
+ Set the target path to store the FUNDING from SEMANTIC RELATION graph
+
+ fundingGraphPath
+ /tmp/core_provision/graph/10_graph_funding
+
+
+
+
+
+
+
+ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
+
+ communitySemRelGraphPath
+ /tmp/core_provision/graph/11_graph_comunity_sem_rel
+
+
+
+
+
+
+
+ Set the target path to store the COUNTRY enriched graph
+
+ countryGraphPath
+ /tmp/core_provision/graph/12_graph_country
+
+
+
+
+
+
+
+ Set the target path to store the CLEANED graph
+
+ cleanedGraphPath
+ /tmp/core_provision/graph/13_graph_cleaned
+
+
+
+
+
+
+
+ Set the target path to store the blacklisted graph
+
+ blacklistedGraphPath
+ /tmp/core_provision/graph/14_graph_blacklisted
+
+
+
+
+
+
+
+ Set the map of paths for the Bulk Tagging
+
+ bulkTaggingPathMap
+ {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"}
+
+
+
+
+
+
+
+ Set the map of associations organization, community list for the propagation of community to result through organization
+
+ propagationOrganizationCommunityMap
+ {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"],
+ "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]}
+
+
+
+
+
+
+
+
+ Set the dedup orchestrator name
+
+ dedupConfig
+ decisiontree-dedup-test
+
+
+
+
+
+
+
+ declares the ActionSet ids to promote in the RAW graph
+
+ actionSetIdsRawGraph
+ scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent
+
+
+
+
+
+
+
+ declares the ActionSet ids to promote in the INFERRED graph
+
+ actionSetIdsIISGraph
+ iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19
+
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+
+
+
+ create the AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphOutputPath' : 'betaAggregatorGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl',
+ 'reuseContent' : 'reuseBetaContent',
+ 'contentPath' : 'betaContentPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app',
+ 'mongoURL' : 'mongodb://beta.services.openaire.eu',
+ 'mongoDb' : 'mdstore',
+ 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
+ 'postgresUser' : 'dnet',
+ 'postgresPassword' : '',
+ 'workingDir' : '/tmp/core_provision/working_dir/beta_aggregator'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ create the AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphOutputPath' : 'prodAggregatorGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl',
+ 'reuseContent' : 'reuseProdContent',
+ 'contentPath' : 'prodContentPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app',
+ 'mongoURL' : 'mongodb://services.openaire.eu',
+ 'mongoDb' : 'mdstore',
+ 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus',
+ 'postgresUser' : 'dnet',
+ 'postgresPassword' : '',
+ 'workingDir' : '/tmp/core_provision/working_dir/prod_aggregator'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ wait configurations
+
+
+
+
+
+
+
+ create the AGGREGATOR graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'betaInputGgraphPath' : 'betaAggregatorGraphPath',
+ 'prodInputGgraphPath' : 'prodAggregatorGraphPath',
+ 'graphOutputPath' : 'mergedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/graph/merge/oozie_app',
+ 'workingDir' : '/tmp/core_provision/working_dir/merge_graph'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ create the RAW graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputActionSetIds' : 'actionSetIdsRawGraph',
+ 'inputGraphRootPath' : 'mergedGraphPath',
+ 'outputGraphRootPath' : 'rawGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'activePromoteDatasetActionPayload' : 'true',
+ 'activePromoteDatasourceActionPayload' : 'true',
+ 'activePromoteOrganizationActionPayload' : 'true',
+ 'activePromoteOtherResearchProductActionPayload' : 'true',
+ 'activePromoteProjectActionPayload' : 'true',
+ 'activePromotePublicationActionPayload' : 'true',
+ 'activePromoteRelationActionPayload' : 'true',
+ 'activePromoteResultActionPayload' : 'true',
+ 'activePromoteSoftwareActionPayload' : 'true',
+ 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
+ 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsRaw'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ search for duplicates in the raw graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'actionSetId' : 'dedupConfig',
+ 'graphBasePath' : 'rawGraphPath',
+ 'dedupGraphPath': 'dedupGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app',
+ 'workingPath' : '/tmp/core_provision/working_dir/dedup'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ create the INFERRED graph
+
+ executeOozieJob
+ IIS
+
+ {
+ 'inputActionSetIds' : 'actionSetIdsIISGraph',
+ 'inputGraphRootPath' : 'dedupGraphPath',
+ 'outputGraphRootPath' : 'inferredGraphPath',
+ 'isLookupUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'activePromoteDatasetActionPayload' : 'true',
+ 'activePromoteDatasourceActionPayload' : 'true',
+ 'activePromoteOrganizationActionPayload' : 'true',
+ 'activePromoteOtherResearchProductActionPayload' : 'true',
+ 'activePromoteProjectActionPayload' : 'true',
+ 'activePromotePublicationActionPayload' : 'true',
+ 'activePromoteRelationActionPayload' : 'true',
+ 'activePromoteResultActionPayload' : 'true',
+ 'activePromoteSoftwareActionPayload' : 'true',
+ 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET',
+ 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsIIS'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ mark duplicates as deleted and redistribute the relationships
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath' : 'inferredGraphPath',
+ 'dedupGraphPath': 'consistentGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app',
+ 'workingPath' : '/tmp/core_provision/working_dir/dedup'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ propagates ORCID among results linked by allowedsemrels semantic relationships
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'consistentGraphPath',
+ 'outputPath': 'orcidGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app',
+ 'workingDir' : '/tmp/core_provision/working_dir/orcid',
+ 'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ mark results respecting some rules as belonging to communities
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'orcidGraphPath',
+ 'outputPath': 'bulkTaggingGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl',
+ 'pathMap' : 'bulkTaggingPathMap'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app',
+ 'workingDir' : '/tmp/core_provision/working_dir/bulktag'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ creates relashionships between results and organizations when the organizations are associated to institutional repositories
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'bulkTaggingGraphPath',
+ 'outputPath': 'affiliationGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app',
+ 'workingDir' : '/tmp/core_provision/working_dir/affiliation',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'affiliationGraphPath',
+ 'outputPath': 'communityOrganizationGraphPath',
+ 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app',
+ 'workingDir' : '/tmp/core_provision/working_dir/community_organization',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'communityOrganizationGraphPath',
+ 'outputPath': 'fundingGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app',
+ 'workingDir' : '/tmp/core_provision/working_dir/funding',
+ 'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'fundingGraphPath',
+ 'outputPath': 'communitySemRelGraphPath',
+ 'isLookUpUrl' : 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app',
+ 'workingDir' : '/tmp/core_provision/working_dir/community_semrel',
+ 'allowedsemrels' : 'isSupplementedBy;isSupplementTo',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'communitySemRelGraphPath',
+ 'outputPath': 'countryGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G',
+ 'workingDir' : '/tmp/core_provision/working_dir/country',
+ 'allowedtypes' : 'pubsrepository::institutional',
+ 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c',
+ 'saveGraph' : 'true'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphInputPath' : 'countryGraphPath',
+ 'graphOutputPath': 'cleanedGraphPath',
+ 'isLookupUrl': 'isLookUpUrl'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app',
+ 'workingPath' : '/tmp/core_provision/working_dir/clean'
+ }
+
+ build-report
+
+
+
+
+
+
+
+ removes blacklisted relations
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'cleanedGraphPath',
+ 'outputPath': 'blacklistedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app',
+ 'workingDir' : '/tmp/core_provision/working_dir/blacklist',
+ 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
+ 'postgresUser' : 'dnet',
+ 'postgresPassword' : ''
+ }
+
+ build-report
+
+
+
+
+
+
+
+
+ wf_20200615_163630_609
+ 2020-06-15T17:08:00+00:00
+ SUCCESS
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_construction.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_core_construction.xml
similarity index 99%
rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_construction.xml
rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_core_construction.xml
index 4d77883b49..3bce81c50a 100644
--- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_construction.xml
+++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_core_construction.xml
@@ -413,7 +413,7 @@
build-report
-
+