diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java
similarity index 96%
rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java
rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java
index ea738836b..3f65d754f 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java
@@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.oa.dedup;
+package eu.dnetlib.dhp.oa.merge;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@@ -38,7 +38,7 @@ public class DispatchEntitiesSparkJob {
.requireNonNull(
DispatchEntitiesSparkJob.class
.getResourceAsStream(
- "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json")));
+ "/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
similarity index 98%
rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java
rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
index a19f86380..e652bd5b6 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
@@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.oa.dedup;
+package eu.dnetlib.dhp.oa.merge;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.utils.DHPUtils.toSeq;
@@ -53,7 +53,7 @@ public class GroupEntitiesSparkJob {
.toString(
GroupEntitiesSparkJob.class
.getResourceAsStream(
- "/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json"));
+ "/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
index 720fe47fb..1ffc66dfd 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@@ -47,6 +47,17 @@ public class OafMapperUtils {
}
public static Result mergeResults(Result left, Result right) {
+
+ final boolean leftFromDeletedAuthority = isFromDelegatedAuthority(left);
+ final boolean rightFromDeletedAuthority = isFromDelegatedAuthority(right);
+
+ if (leftFromDeletedAuthority && !rightFromDeletedAuthority) {
+ return left;
+ }
+ if (!leftFromDeletedAuthority && rightFromDeletedAuthority) {
+ return right;
+ }
+
if (new ResultTypeComparator().compare(left, right) < 0) {
left.mergeFrom(right);
return left;
@@ -56,6 +67,18 @@ public class OafMapperUtils {
}
}
+ private static boolean isFromDelegatedAuthority(Result r) {
+ return Optional
+ .ofNullable(r.getInstance())
+ .map(
+ instance -> instance
+ .stream()
+ .filter(i -> Objects.nonNull(i.getCollectedfrom()))
+ .map(i -> i.getCollectedfrom().getKey())
+ .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
+ .orElse(false);
+ }
+
public static KeyValue keyValue(final String k, final String v) {
final KeyValue kv = new KeyValue();
kv.setKey(k);
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json
similarity index 100%
rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json
rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json
similarity index 100%
rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json
rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
index 4ea003926..7c500493f 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml
@@ -104,7 +104,7 @@
yarn
cluster
group graph entities
- eu.dnetlib.dhp.oa.dedup.GroupEntitiesSparkJob
+ eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob
dhp-dedup-openaire-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
@@ -138,7 +138,7 @@
yarn
cluster
Dispatch publications
- eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
dhp-dedup-openaire-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
@@ -163,7 +163,7 @@
yarn
cluster
Dispatch project
- eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
dhp-dedup-openaire-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
@@ -188,7 +188,7 @@
yarn
cluster
Dispatch organization
- eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
dhp-dedup-openaire-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
@@ -213,7 +213,7 @@
yarn
cluster
Dispatch publication
- eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
dhp-dedup-openaire-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
@@ -238,7 +238,7 @@
yarn
cluster
Dispatch dataset
- eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
dhp-dedup-openaire-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
@@ -263,7 +263,7 @@
yarn
cluster
Dispatch software
- eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
dhp-dedup-openaire-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
@@ -288,7 +288,7 @@
yarn
cluster
Dispatch otherresearchproduct
- eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
dhp-dedup-openaire-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml
new file mode 100644
index 000000000..2e0ed9aee
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml
@@ -0,0 +1,18 @@
+
+
+ jobTracker
+ yarnRM
+
+
+ nameNode
+ hdfs://nameservice1
+
+
+ oozie.use.system.libpath
+ true
+
+
+ oozie.action.sharelib.for.spark
+ spark2
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml
new file mode 100644
index 000000000..f77b46105
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml
@@ -0,0 +1,298 @@
+
+
+
+ graphBasePath
+ the input graph base path
+
+
+ workingPath
+ path of the working directory
+
+
+ graphOutputPath
+ path of the output graph
+
+
+ sparkDriverMemory
+ memory for driver process
+
+
+ sparkExecutorMemory
+ memory for individual executor
+
+
+ sparkExecutorCores
+ number of cores used by single executor
+
+
+
+ oozieActionShareLibForSpark2
+ oozie action sharelib for spark 2.*
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+ spark 2.* extra listeners classname
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+ spark 2.* sql query execution listeners classname
+
+
+ spark2YarnHistoryServerAddress
+ spark 2.* yarn history server address
+
+
+ spark2EventLogDir
+ spark 2.* event log dir location
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ mapreduce.job.queuename
+ ${queueName}
+
+
+ oozie.launcher.mapred.job.queue.name
+ ${oozieLauncherQueueName}
+
+
+ oozie.action.sharelib.for.spark
+ ${oozieActionShareLibForSpark2}
+
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+ yarn
+ cluster
+ group graph entities
+ eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=15000
+
+ --graphInputPath${graphBasePath}
+ --outputPath${workingPath}/grouped_entities
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dispatch publications
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${workingPath}/grouped_entities
+ --outputPath${graphOutputPath}/datasource
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dispatch project
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${workingPath}/grouped_entities
+ --outputPath${graphOutputPath}/project
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dispatch organization
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${workingPath}/grouped_entities
+ --outputPath${graphOutputPath}/organization
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dispatch publication
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${workingPath}/grouped_entities
+ --outputPath${graphOutputPath}/publication
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dispatch dataset
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${workingPath}/grouped_entities
+ --outputPath${graphOutputPath}/dataset
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dispatch software
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${workingPath}/grouped_entities
+ --outputPath${graphOutputPath}/software
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dispatch otherresearchproduct
+ eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${workingPath}/grouped_entities
+ --outputPath${graphOutputPath}/otherresearchproduct
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+
+
+
+
+
+
+
+
+
+ ${nameNode}/${graphBasePath}/relation
+ ${nameNode}/${graphOutputPath}/relation
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java
new file mode 100644
index 000000000..3bd1c13de
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java
@@ -0,0 +1,144 @@
+
+package eu.dnetlib.dhp.oa.graph.group;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.*;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob;
+import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
+public class GroupEntitiesSparkJobTest {
+
+ private static SparkSession spark;
+
+ private static ObjectMapper mapper = new ObjectMapper()
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+
+ private static Path workingDir;
+ private Path dataInputPath;
+
+ private Path groupEntityPath;
+ private Path dispatchEntityPath;
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName());
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName());
+ conf.setMaster("local");
+ conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+ conf.registerKryoClasses(ModelSupport.getOafModelClasses());
+ spark = SparkSession.builder().config(conf).getOrCreate();
+ }
+
+ @BeforeEach
+ public void beforeEach() throws IOException, URISyntaxException {
+ dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI());
+ groupEntityPath = workingDir.resolve("grouped_entity");
+ dispatchEntityPath = workingDir.resolve("dispatched_entity");
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ spark.stop();
+ FileUtils.deleteDirectory(workingDir.toFile());
+ }
+
+ @Test
+ @Order(1)
+ void testGroupEntities() throws Exception {
+ GroupEntitiesSparkJob.main(new String[] {
+ "-isSparkSessionManaged",
+ Boolean.FALSE.toString(),
+ "-graphInputPath",
+ dataInputPath.toString(),
+ "-outputPath",
+ groupEntityPath.toString()
+ });
+
+ Dataset output = spark
+ .read()
+ .textFile(groupEntityPath.toString())
+ .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING())
+ .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class));
+
+ assertEquals(
+ 1,
+ output
+ .filter(
+ (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9"
+ .equals(r.getId()) &&
+ r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo")))
+ .count());
+ }
+
+ @Test
+ @Order(2)
+ void testDispatchEntities() throws Exception {
+ for (String type : Lists
+ .newArrayList(
+ Publication.class.getCanonicalName(), eu.dnetlib.dhp.schema.oaf.Dataset.class.getCanonicalName())) {
+ String directory = StringUtils.substringAfterLast(type, ".").toLowerCase();
+ DispatchEntitiesSparkJob.main(new String[] {
+ "-isSparkSessionManaged",
+ Boolean.FALSE.toString(),
+ "-inputPath",
+ groupEntityPath.toString(),
+ "-outputPath",
+ dispatchEntityPath.resolve(directory).toString(),
+ "-graphTableClassName",
+ type
+ });
+ }
+
+ Dataset output = spark
+ .read()
+ .textFile(
+ DHPUtils
+ .toSeq(
+ HdfsSupport
+ .listFiles(dispatchEntityPath.toString(), spark.sparkContext().hadoopConfiguration())))
+ .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class));
+
+ assertEquals(3, output.count());
+ assertEquals(
+ 2,
+ output
+ .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING())
+ .filter((FilterFunction) s -> s.equals("publication"))
+ .count());
+ assertEquals(
+ 1,
+ output
+ .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING())
+ .filter((FilterFunction) s -> s.equals("dataset"))
+ .count());
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json
new file mode 100644
index 000000000..e30be47e9
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json
@@ -0,0 +1,3 @@
+{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]}
+{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]}
+{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json
new file mode 100644
index 000000000..29ce76df3
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json
@@ -0,0 +1,3 @@
+{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]}
+{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]}
+{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml
index c10dd4e99..82cf9d3d5 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml
@@ -4,7 +4,7 @@
-
+
Graph processing [EXPERIMENT]
@@ -15,7 +15,7 @@
set the path of unresolved entities
unresolvedEntityPath
- /data/unresolved_BETA
+ /data/unresolved_BETA/content
@@ -51,6 +51,16 @@
+
+ set the number of iteration in affiliation propagation
+
+ iterations
+ 1
+
+
+
+
+
Set the target path to store the MERGED graph
@@ -91,11 +101,21 @@
+
+ Set the target path to store the GROUPED graph
+
+ groupedGraphPath
+ /tmp/beta_experiment/graph/05_graph_grouped
+
+
+
+
+
Set the target path to store the INFERRED graph
inferredGraphPath
- /tmp/beta_experiment/graph/05_graph_inferred
+ /tmp/beta_experiment/graph/06_graph_inferred
@@ -105,7 +125,7 @@
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/beta_experiment/graph/06_graph_dedup
+ /tmp/beta_experiment/graph/07_graph_dedup
@@ -115,7 +135,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/beta_experiment/graph/07_graph_consistent
+ /tmp/beta_experiment/graph/08_graph_consistent
@@ -125,7 +145,7 @@
Set the target path to store the ORCID enriched graph
orcidGraphPath
- /tmp/beta_experiment/graph/08_graph_orcid
+ /tmp/beta_experiment/graph/09_graph_orcid
@@ -135,7 +155,7 @@
Set the target path to store the BULK TAGGED graph
bulkTaggingGraphPath
- /tmp/beta_experiment/graph/09_graph_bulktagging
+ /tmp/beta_experiment/graph/10_graph_bulktagging
@@ -145,7 +165,7 @@
Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
affiliationGraphPath
- /tmp/beta_experiment/graph/10_graph_affiliation
+ /tmp/beta_experiment/graph/11_graph_affiliation
@@ -155,7 +175,7 @@
Set the target path to store the AFFILIATION from SEMATIC RELATION graph
affiliationSemRelGraphPath
- /tmp/beta_experiment/graph/11_graph_affiliationsr
+ /tmp/beta_experiment/graph/12_graph_affiliationsr
@@ -165,7 +185,7 @@
Set the target path to store the COMMUNITY from SELECTED SOURCES graph
communityOrganizationGraphPath
- /tmp/beta_experiment/graph/12_graph_community_organization
+ /tmp/beta_experiment/graph/13_graph_community_organization
@@ -175,7 +195,7 @@
Set the target path to store the FUNDING from SEMANTIC RELATION graph
fundingGraphPath
- /tmp/beta_experiment/graph/13_graph_funding
+ /tmp/beta_experiment/graph/14_graph_funding
@@ -185,7 +205,7 @@
Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
communitySemRelGraphPath
- /tmp/beta_experiment/graph/14_graph_community_sem_rel
+ /tmp/beta_experiment/graph/15_graph_community_sem_rel
@@ -195,7 +215,7 @@
Set the target path to store the COUNTRY enriched graph
countryGraphPath
- /tmp/beta_experiment/graph/15_graph_country
+ /tmp/beta_experiment/graph/16_graph_country
@@ -205,7 +225,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/beta_experiment/graph/16_graph_cleaned
+ /tmp/beta_experiment/graph/17_graph_cleaned
@@ -215,7 +235,7 @@
Set the target path to store the blacklisted graph
blacklistedGraphPath
- /tmp/beta_experiment/graph/17_graph_blacklisted
+ /tmp/beta_experiment/graph/18_graph_blacklisted
@@ -548,14 +568,14 @@
'mongoURL' : 'mongodb://beta.services.openaire.eu',
'mongoDb' : 'mdstore',
'mdstoreManagerUrl' : 'https://beta.services.openaire.eu/mdstoremanager',
- 'postgresURL' : '',
+ 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : '',
'postgresPassword' : '',
- 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.40:5432/oa_organizations',
'postgresOpenOrgsUser' : '',
'postgresOpenOrgsPassword' : '',
'shouldHashId' : 'true',
- 'importOpenorgs' : 'true',
+ 'importOpenorgs' : 'false',
'workingDir' : '/tmp/beta_experiment/working_dir/beta_aggregator'
}
@@ -594,10 +614,10 @@
'mongoURL' : 'mongodb://services.openaire.eu',
'mongoDb' : 'mdstore',
'mdstoreManagerUrl' : 'https://services.openaire.eu/mdstoremanager',
- 'postgresURL' : '',
+ 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : '',
'postgresPassword' : '',
- 'postgresOpenOrgsURL' : '',
+ 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.39:5432/oa_organizations',
'postgresOpenOrgsUser' : '',
'postgresOpenOrgsPassword' : '',
'shouldHashId' : 'true',
@@ -737,11 +757,11 @@
executeOozieJob
IIS
- {
+ {
'graphBasePath':'cleanedFirstGraphPath',
'unresolvedPath' :'unresolvedEntityPath',
'targetPath':'resolvedGraphPath'
- }
+ }
{
@@ -752,6 +772,30 @@
}
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'resolvedGraphPath',
+ 'targetPath':'groupedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app',
+ 'workingDir' : '/tmp/beta_experiment/working_dir/grouping',
+ 'sparkExecutorCores' : '4',
+ 'sparkExecutorMemory' : '7G'
+ }
+
+
@@ -867,9 +911,9 @@
import_mdstore_service_location
import_dataset_mdstore_ids_csv
oozie.wf.application.path
- /lib/iis/primary/snapshots/2021-09-24
+ /lib/iis/primary/snapshots/2021-12-09
IIS
- /tmp/beta_inference/graph/07_graph_cleaned
+ deprecated - not used
import_infospace_graph_location
import_project_concepts_context_ids_csv
@@ -908,7 +952,7 @@
'import_islookup_service_location' : 'import_islookup_service_location',
'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv',
'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv',
- 'import_infospace_graph_location' : 'import_infospace_graph_location',
+ 'import_infospace_graph_location' : 'groupedGraphPath',
'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations',
'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets',
@@ -958,7 +1002,7 @@
{
'inputActionSetIds' : 'actionSetIdsIISGraph',
- 'inputGraphRootPath' : 'resolvedGraphPath',
+ 'inputGraphRootPath' : 'groupedGraphPath',
'outputGraphRootPath' : 'inferredGraphPath',
'isLookupUrl' : 'isLookUpUrl'
}
@@ -1125,7 +1169,8 @@
{
'sourcePath' : 'affiliationGraphPath',
- 'outputPath': 'affiliationSemRelGraphPath'
+ 'outputPath': 'affiliationSemRelGraphPath',
+ 'iterations':'iterations'
}
@@ -1283,7 +1328,7 @@
{
'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app',
'workingDir' : '/tmp/beta_experiment/working_dir/blacklist',
- 'postgresURL' : '',
+ 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus',
'postgresUser' : '',
'postgresPassword' : ''
}
@@ -1296,10 +1341,10 @@
- wf_20211206_093743_83
- 2021-12-06T10:12:32+00:00
- SUCCESS
-
+ wf_20220111_200505_785
+ 2022-01-11T20:08:53+00:00
+
+
-
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml
index 2fed35f44..df9528f4c 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml
@@ -11,6 +11,16 @@
IIS
30
+
+ set the path of unresolved entities
+
+ unresolvedEntityPath
+ /data/unresolved_BETA/content
+
+
+
+
+
set blacklist of funder nsPrefixes from the beta aggregator
@@ -71,11 +81,31 @@
+
+ Set the target path to store the CLEANED graph
+
+ resolvedGraphPath
+ /tmp/beta_inference/graph/04_graph_resolved
+
+
+
+
+
+
+ Set the target path to store the GROUPED graph
+
+ groupedGraphPath
+ /tmp/beta_inference/graph/05_graph_grouped
+
+
+
+
+
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/beta_inference/graph/04_graph_dedup
+ /tmp/beta_inference/graph/06_graph_dedup
@@ -85,7 +115,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/beta_inference/graph/05_graph_consistent
+ /tmp/beta_inference/graph/07_graph_consistent
@@ -95,7 +125,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/beta_inference/graph/06_graph_cleaned
+ /tmp/beta_inference/graph/08_graph_cleaned
@@ -548,6 +578,55 @@
build-report
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'cleanedFirstGraphPath',
+ 'unresolvedPath' :'unresolvedEntityPath',
+ 'targetPath':'resolvedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app',
+ 'workingDir' : '/tmp/beta_inference/working_dir/relation_resolution',
+ 'sparkExecutorCores' : '2',
+ 'sparkExecutorMemory' : '12G'
+ }
+
+
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'resolvedGraphPath',
+ 'targetPath':'groupedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app',
+ 'workingDir' : '/tmp/beta_inference/working_dir/grouping',
+ 'sparkExecutorCores' : '4',
+ 'sparkExecutorMemory' : '7G'
+ }
+
+
@@ -560,7 +639,7 @@
{
'actionSetId' : 'dedupConfig',
- 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'graphBasePath' : 'groupedGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml
index e5ce3d710..0ea6be341 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml
@@ -11,6 +11,16 @@
IIS
30
+
+ set the path of unresolved entities
+
+ unresolvedEntityPath
+ /data/unresolved_PROD/content
+
+
+
+
+
set blacklist of funder nsPrefixes
@@ -61,11 +71,21 @@
+
+ Set the target path to store the CLEANED graph
+
+ resolvedGraphPath
+ /tmp/beta_inference/graph/03_graph_resolved
+
+
+
+
+
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/prod_inference/graph/03_graph_dedup
+ /tmp/prod_inference/graph/04_graph_dedup
@@ -75,7 +95,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/prod_inference/graph/04_graph_consistent
+ /tmp/prod_inference/graph/05_graph_consistent
@@ -85,7 +105,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/prod_inference/graph/05_graph_cleaned
+ /tmp/prod_inference/graph/06_graph_cleaned
@@ -347,6 +367,31 @@
build-report
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'cleanedFirstGraphPath',
+ 'unresolvedPath' :'unresolvedEntityPath',
+ 'targetPath':'resolvedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app',
+ 'workingDir' : '/tmp/prod_inference/working_dir/relation_resolution',
+ 'sparkExecutorCores' : '2',
+ 'sparkExecutorMemory' : '12G'
+ }
+
+
@@ -359,7 +404,7 @@
{
'actionSetId' : 'dedupConfig',
- 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'graphBasePath' : 'resolvedGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml
index f83337b3c..73c44aba8 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml
@@ -11,6 +11,16 @@
Data Provision
30
+
+ set the path of unresolved entities
+
+ unresolvedEntityPath
+ /data/unresolved_BETA/content
+
+
+
+
+
set blacklist of funder nsPrefixes from the beta aggregator
@@ -71,11 +81,31 @@
+
+ Set the target path to store the CLEANED graph
+
+ resolvedGraphPath
+ /tmp/beta_provision/graph/04_graph_resolved
+
+
+
+
+
+
+ Set the target path to store the GROUPED graph
+
+ groupedGraphPath
+ /tmp/beta_provision/graph/05_graph_grouped
+
+
+
+
+
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/beta_provision/graph/04_graph_dedup
+ /tmp/beta_provision/graph/06_graph_dedup
@@ -85,7 +115,7 @@
Set the target path to store the INFERRED graph
inferredGraphPath
- /tmp/beta_provision/graph/05_graph_inferred
+ /tmp/beta_provision/graph/07_graph_inferred
@@ -95,7 +125,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/beta_provision/graph/06_graph_consistent
+ /tmp/beta_provision/graph/08_graph_consistent
@@ -105,7 +135,7 @@
Set the target path to store the ORCID enriched graph
orcidGraphPath
- /tmp/beta_provision/graph/07_graph_orcid
+ /tmp/beta_provision/graph/09_graph_orcid
@@ -115,7 +145,7 @@
Set the target path to store the BULK TAGGED graph
bulkTaggingGraphPath
- /tmp/beta_provision/graph/08_graph_bulktagging
+ /tmp/beta_provision/graph/10_graph_bulktagging
@@ -125,7 +155,17 @@
Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
affiliationGraphPath
- /tmp/beta_provision/graph/09_graph_affiliation
+ /tmp/beta_provision/graph/11_graph_affiliation
+
+
+
+
+
+
+ Set the target path to store the AFFILIATION from SEMATIC RELATION graph
+
+ affiliationSemRelGraphPath
+ /tmp/beta_provision/graph/12_graph_affiliationsr
@@ -135,7 +175,7 @@
Set the target path to store the COMMUNITY from SELECTED SOURCES graph
communityOrganizationGraphPath
- /tmp/beta_provision/graph/10_graph_comunity_organization
+ /tmp/beta_provision/graph/13_graph_comunity_organization
@@ -145,7 +185,7 @@
Set the target path to store the FUNDING from SEMANTIC RELATION graph
fundingGraphPath
- /tmp/beta_provision/graph/11_graph_funding
+ /tmp/beta_provision/graph/14_graph_funding
@@ -155,7 +195,7 @@
Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
communitySemRelGraphPath
- /tmp/beta_provision/graph/12_graph_comunity_sem_rel
+ /tmp/beta_provision/graph/15_graph_comunity_sem_rel
@@ -165,7 +205,7 @@
Set the target path to store the COUNTRY enriched graph
countryGraphPath
- /tmp/beta_provision/graph/13_graph_country
+ /tmp/beta_provision/graph/16_graph_country
@@ -175,7 +215,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/beta_provision/graph/14_graph_cleaned
+ /tmp/beta_provision/graph/17_graph_cleaned
@@ -185,7 +225,7 @@
Set the target path to store the blacklisted graph
blacklistedGraphPath
- /tmp/beta_provision/graph/15_graph_blacklisted
+ /tmp/beta_provision/graph/18_graph_blacklisted
@@ -695,6 +735,55 @@
build-report
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'cleanedFirstGraphPath',
+ 'unresolvedPath' :'unresolvedEntityPath',
+ 'targetPath':'resolvedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/relation_resolution',
+ 'sparkExecutorCores' : '2',
+ 'sparkExecutorMemory' : '12G'
+ }
+
+
+
+
+
+
+
+ Resolve Relation
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'resolvedGraphPath',
+ 'targetPath':'groupedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app',
+ 'workingDir' : '/tmp/beta_provision/working_dir/grouping',
+ 'sparkExecutorCores' : '4',
+ 'sparkExecutorMemory' : '7G'
+ }
+
+
@@ -707,7 +796,7 @@
{
'actionSetId' : 'dedupConfig',
- 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'graphBasePath' : 'groupedGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml
index be6155f2f..205db29a6 100644
--- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml
+++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml
@@ -11,6 +11,16 @@
Data Provision
30
+
+ set the path of unresolved entities
+
+ unresolvedEntityPath
+ /data/unresolved_PROD/content
+
+
+
+
+
set blacklist of funder nsPrefixes
@@ -51,11 +61,21 @@
+
+ Set the target path to store the CLEANED graph
+
+ resolvedGraphPath
+ /tmp/beta_inference/graph/03_graph_resolved
+
+
+
+
+
Set the target path to store the DEDUPED graph
dedupGraphPath
- /tmp/prod_provision/graph/03_graph_dedup
+ /tmp/prod_provision/graph/04_graph_dedup
@@ -65,7 +85,7 @@
Set the target path to store the INFERRED graph
inferredGraphPath
- /tmp/prod_provision/graph/04_graph_inferred
+ /tmp/prod_provision/graph/05_graph_inferred
@@ -75,7 +95,7 @@
Set the target path to store the CONSISTENCY graph
consistentGraphPath
- /tmp/prod_provision/graph/05_graph_consistent
+ /tmp/prod_provision/graph/06_graph_consistent
@@ -85,7 +105,7 @@
Set the target path to store the ORCID enriched graph
orcidGraphPath
- /tmp/prod_provision/graph/06_graph_orcid
+ /tmp/prod_provision/graph/07_graph_orcid
@@ -95,7 +115,7 @@
Set the target path to store the BULK TAGGED graph
bulkTaggingGraphPath
- /tmp/prod_provision/graph/07_graph_bulktagging
+ /tmp/prod_provision/graph/08_graph_bulktagging
@@ -105,7 +125,7 @@
Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph
affiliationGraphPath
- /tmp/prod_provision/graph/08_graph_affiliation
+ /tmp/prod_provision/graph/09_graph_affiliation
@@ -115,7 +135,7 @@
Set the target path to store the COMMUNITY from SELECTED SOURCES graph
communityOrganizationGraphPath
- /tmp/prod_provision/graph/09_graph_comunity_organization
+ /tmp/prod_provision/graph/10_graph_comunity_organization
@@ -125,7 +145,7 @@
Set the target path to store the FUNDING from SEMANTIC RELATION graph
fundingGraphPath
- /tmp/prod_provision/graph/10_graph_funding
+ /tmp/prod_provision/graph/11_graph_funding
@@ -135,7 +155,7 @@
Set the target path to store the COMMUNITY from SEMANTIC RELATION graph
communitySemRelGraphPath
- /tmp/prod_provision/graph/11_graph_comunity_sem_rel
+ /tmp/prod_provision/graph/12_graph_comunity_sem_rel
@@ -145,7 +165,7 @@
Set the target path to store the COUNTRY enriched graph
countryGraphPath
- /tmp/prod_provision/graph/12_graph_country
+ /tmp/prod_provision/graph/13_graph_country
@@ -155,7 +175,7 @@
Set the target path to store the CLEANED graph
cleanedGraphPath
- /tmp/prod_provision/graph/13_graph_cleaned
+ /tmp/prod_provision/graph/14_graph_cleaned
@@ -165,7 +185,7 @@
Set the target path to store the blacklisted graph
blacklistedGraphPath
- /tmp/prod_provision/graph/14_graph_blacklisted
+ /tmp/prod_provision/graph/15_graph_blacklisted
@@ -446,6 +466,59 @@
build-report
+
+
+
+
+
+ updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI
+
+ executeOozieJob
+ IIS
+
+ {
+ 'sourcePath' : 'cleanedFirstGraphPath'
+ }
+
+
+ {
+ 'resumeFrom' : 'prepareInfo',
+ 'hostedByMapPath' : '/user/dnet.production/data/hostedByMap',
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap',
+ 'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap',
+ 'sparkExecutorCores' : '3',
+ 'sparkExecutorMemory' : '10G'
+ }
+
+ build-report
+
+
+
+
+
+
+ Graph resolution
+
+ executeOozieJob
+ IIS
+
+ {
+ 'graphBasePath':'cleanedFirstGraphPath',
+ 'unresolvedPath' :'unresolvedEntityPath',
+ 'targetPath':'resolvedGraphPath'
+ }
+
+
+ {
+ 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app',
+ 'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution',
+ 'shouldResolveEntities' : 'false',
+ 'sparkExecutorCores' : '4',
+ 'sparkExecutorMemory' : '9G'
+ }
+
+
@@ -458,7 +531,7 @@
{
'actionSetId' : 'dedupConfig',
- 'graphBasePath' : 'cleanedFirstGraphPath',
+ 'graphBasePath' : 'resolvedGraphPath',
'dedupGraphPath': 'dedupGraphPath',
'isLookUpUrl' : 'isLookUpUrl'
}
diff --git a/pom.xml b/pom.xml
index 7a026e668..86c3b4526 100644
--- a/pom.xml
+++ b/pom.xml
@@ -797,7 +797,7 @@
3.3.3
3.4.2
[2.12,3.0)
- [2.10.24]
+ [2.10.26-SNAPSHOT]
[4.0.3]
[6.0.5]
[3.1.6]