diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java similarity index 96% rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java index ea738836b..3f65d754f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.dedup; +package eu.dnetlib.dhp.oa.merge; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; @@ -38,7 +38,7 @@ public class DispatchEntitiesSparkJob { .requireNonNull( DispatchEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); + "/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java similarity index 98% rename from dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java index a19f86380..e652bd5b6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.dedup; +package eu.dnetlib.dhp.oa.merge; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.utils.DHPUtils.toSeq; @@ -53,7 +53,7 @@ public class GroupEntitiesSparkJob { .toString( GroupEntitiesSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json")); + "/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index 720fe47fb..1ffc66dfd 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -47,6 +47,17 @@ public class OafMapperUtils { } public static Result mergeResults(Result left, Result right) { + + final boolean leftFromDeletedAuthority = isFromDelegatedAuthority(left); + final boolean rightFromDeletedAuthority = isFromDelegatedAuthority(right); + + if (leftFromDeletedAuthority && !rightFromDeletedAuthority) { + return left; + } + if (!leftFromDeletedAuthority && rightFromDeletedAuthority) { + return right; + } + if (new ResultTypeComparator().compare(left, right) < 0) { left.mergeFrom(right); return left; @@ -56,6 +67,18 @@ public class OafMapperUtils { } } + private static boolean isFromDelegatedAuthority(Result r) { + return Optional + .ofNullable(r.getInstance()) + .map( + instance -> instance + .stream() + .filter(i -> Objects.nonNull(i.getCollectedfrom())) + .map(i -> i.getCollectedfrom().getKey()) + .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId))) + .orElse(false); + } + public static KeyValue keyValue(final String k, final String v) { final KeyValue kv = new KeyValue(); kv.setKey(k); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json similarity index 100% rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/dispatch_entities_parameters.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json similarity index 100% rename from dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/group_graph_entities_parameters.json rename to dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml index 4ea003926..7c500493f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml @@ -104,7 +104,7 @@ yarn cluster group graph entities - eu.dnetlib.dhp.oa.dedup.GroupEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -138,7 +138,7 @@ yarn cluster Dispatch publications - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -163,7 +163,7 @@ yarn cluster Dispatch project - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -188,7 +188,7 @@ yarn cluster Dispatch organization - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -213,7 +213,7 @@ yarn cluster Dispatch publication - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -238,7 +238,7 @@ yarn cluster Dispatch dataset - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -263,7 +263,7 @@ yarn cluster Dispatch software - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} @@ -288,7 +288,7 @@ yarn cluster Dispatch otherresearchproduct - eu.dnetlib.dhp.oa.dedup.DispatchEntitiesSparkJob + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob dhp-dedup-openaire-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml new file mode 100644 index 000000000..2e0ed9aee --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/config-default.xml @@ -0,0 +1,18 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml new file mode 100644 index 000000000..f77b46105 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/group/oozie_app/workflow.xml @@ -0,0 +1,298 @@ + + + + graphBasePath + the input graph base path + + + workingPath + path of the working directory + + + graphOutputPath + path of the output graph + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + group graph entities + eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=15000 + + --graphInputPath${graphBasePath} + --outputPath${workingPath}/grouped_entities + + + + + + + + + + + + + + + + + + yarn + cluster + Dispatch publications + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/datasource + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource + + + + + + + + yarn + cluster + Dispatch project + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/project + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project + + + + + + + + yarn + cluster + Dispatch organization + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/organization + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization + + + + + + + + yarn + cluster + Dispatch publication + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/publication + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + yarn + cluster + Dispatch dataset + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/dataset + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + yarn + cluster + Dispatch software + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/software + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + yarn + cluster + Dispatch otherresearchproduct + eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${workingPath}/grouped_entities + --outputPath${graphOutputPath}/otherresearchproduct + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + + + ${nameNode}/${graphBasePath}/relation + ${nameNode}/${graphOutputPath}/relation + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java new file mode 100644 index 000000000..3bd1c13de --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java @@ -0,0 +1,144 @@ + +package eu.dnetlib.dhp.oa.graph.group; + +import static org.junit.jupiter.api.Assertions.*; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; + +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob; +import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.DHPUtils; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class GroupEntitiesSparkJobTest { + + private static SparkSession spark; + + private static ObjectMapper mapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + private static Path workingDir; + private Path dataInputPath; + + private Path groupEntityPath; + private Path dispatchEntityPath; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName()); + + SparkConf conf = new SparkConf(); + conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName()); + conf.setMaster("local"); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + spark = SparkSession.builder().config(conf).getOrCreate(); + } + + @BeforeEach + public void beforeEach() throws IOException, URISyntaxException { + dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI()); + groupEntityPath = workingDir.resolve("grouped_entity"); + dispatchEntityPath = workingDir.resolve("dispatched_entity"); + } + + @AfterAll + public static void afterAll() throws IOException { + spark.stop(); + FileUtils.deleteDirectory(workingDir.toFile()); + } + + @Test + @Order(1) + void testGroupEntities() throws Exception { + GroupEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-graphInputPath", + dataInputPath.toString(), + "-outputPath", + groupEntityPath.toString() + }); + + Dataset output = spark + .read() + .textFile(groupEntityPath.toString()) + .map((MapFunction) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING()) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + assertEquals( + 1, + output + .filter( + (FilterFunction) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9" + .equals(r.getId()) && + r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo"))) + .count()); + } + + @Test + @Order(2) + void testDispatchEntities() throws Exception { + for (String type : Lists + .newArrayList( + Publication.class.getCanonicalName(), eu.dnetlib.dhp.schema.oaf.Dataset.class.getCanonicalName())) { + String directory = StringUtils.substringAfterLast(type, ".").toLowerCase(); + DispatchEntitiesSparkJob.main(new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + groupEntityPath.toString(), + "-outputPath", + dispatchEntityPath.resolve(directory).toString(), + "-graphTableClassName", + type + }); + } + + Dataset output = spark + .read() + .textFile( + DHPUtils + .toSeq( + HdfsSupport + .listFiles(dispatchEntityPath.toString(), spark.sparkContext().hadoopConfiguration()))) + .map((MapFunction) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class)); + + assertEquals(3, output.count()); + assertEquals( + 2, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("publication")) + .count()); + assertEquals( + 1, + output + .map((MapFunction) r -> r.getResulttype().getClassid(), Encoders.STRING()) + .filter((FilterFunction) s -> s.equals("dataset")) + .count()); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json new file mode 100644 index 000000000..e30be47e9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/dataset/dataset.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"Zenodo"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json new file mode 100644 index 000000000..29ce76df3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/group/publication/publication.json @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doi_________::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml index c10dd4e99..82cf9d3d5 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_complete_experiment.xml @@ -4,7 +4,7 @@ - + Graph processing [EXPERIMENT] @@ -15,7 +15,7 @@ set the path of unresolved entities unresolvedEntityPath - /data/unresolved_BETA + /data/unresolved_BETA/content @@ -51,6 +51,16 @@ + + set the number of iteration in affiliation propagation + + iterations + 1 + + + + + Set the target path to store the MERGED graph @@ -91,11 +101,21 @@ + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_experiment/graph/05_graph_grouped + + + + + Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_experiment/graph/05_graph_inferred + /tmp/beta_experiment/graph/06_graph_inferred @@ -105,7 +125,7 @@ Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_experiment/graph/06_graph_dedup + /tmp/beta_experiment/graph/07_graph_dedup @@ -115,7 +135,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_experiment/graph/07_graph_consistent + /tmp/beta_experiment/graph/08_graph_consistent @@ -125,7 +145,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_experiment/graph/08_graph_orcid + /tmp/beta_experiment/graph/09_graph_orcid @@ -135,7 +155,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_experiment/graph/09_graph_bulktagging + /tmp/beta_experiment/graph/10_graph_bulktagging @@ -145,7 +165,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_experiment/graph/10_graph_affiliation + /tmp/beta_experiment/graph/11_graph_affiliation @@ -155,7 +175,7 @@ Set the target path to store the AFFILIATION from SEMATIC RELATION graph affiliationSemRelGraphPath - /tmp/beta_experiment/graph/11_graph_affiliationsr + /tmp/beta_experiment/graph/12_graph_affiliationsr @@ -165,7 +185,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_experiment/graph/12_graph_community_organization + /tmp/beta_experiment/graph/13_graph_community_organization @@ -175,7 +195,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_experiment/graph/13_graph_funding + /tmp/beta_experiment/graph/14_graph_funding @@ -185,7 +205,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_experiment/graph/14_graph_community_sem_rel + /tmp/beta_experiment/graph/15_graph_community_sem_rel @@ -195,7 +215,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_experiment/graph/15_graph_country + /tmp/beta_experiment/graph/16_graph_country @@ -205,7 +225,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_experiment/graph/16_graph_cleaned + /tmp/beta_experiment/graph/17_graph_cleaned @@ -215,7 +235,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_experiment/graph/17_graph_blacklisted + /tmp/beta_experiment/graph/18_graph_blacklisted @@ -548,14 +568,14 @@ 'mongoURL' : 'mongodb://beta.services.openaire.eu', 'mongoDb' : 'mdstore', 'mdstoreManagerUrl' : 'https://beta.services.openaire.eu/mdstoremanager', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '', - 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.40:5432/oa_organizations', 'postgresOpenOrgsUser' : '', 'postgresOpenOrgsPassword' : '', 'shouldHashId' : 'true', - 'importOpenorgs' : 'true', + 'importOpenorgs' : 'false', 'workingDir' : '/tmp/beta_experiment/working_dir/beta_aggregator' } @@ -594,10 +614,10 @@ 'mongoURL' : 'mongodb://services.openaire.eu', 'mongoDb' : 'mdstore', 'mdstoreManagerUrl' : 'https://services.openaire.eu/mdstoremanager', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '', - 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsURL' : 'jdbc:postgresql://10.19.65.39:5432/oa_organizations', 'postgresOpenOrgsUser' : '', 'postgresOpenOrgsPassword' : '', 'shouldHashId' : 'true', @@ -737,11 +757,11 @@ executeOozieJob IIS - { + { 'graphBasePath':'cleanedFirstGraphPath', 'unresolvedPath' :'unresolvedEntityPath', 'targetPath':'resolvedGraphPath' - } + } { @@ -752,6 +772,30 @@ } + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_experiment/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -867,9 +911,9 @@ import_mdstore_service_location import_dataset_mdstore_ids_csv oozie.wf.application.path - /lib/iis/primary/snapshots/2021-09-24 + /lib/iis/primary/snapshots/2021-12-09 IIS - /tmp/beta_inference/graph/07_graph_cleaned + deprecated - not used import_infospace_graph_location import_project_concepts_context_ids_csv @@ -908,7 +952,7 @@ 'import_islookup_service_location' : 'import_islookup_service_location', 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', - 'import_infospace_graph_location' : 'import_infospace_graph_location', + 'import_infospace_graph_location' : 'groupedGraphPath', 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', @@ -958,7 +1002,7 @@ { 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'resolvedGraphPath', + 'inputGraphRootPath' : 'groupedGraphPath', 'outputGraphRootPath' : 'inferredGraphPath', 'isLookupUrl' : 'isLookUpUrl' } @@ -1125,7 +1169,8 @@ { 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'affiliationSemRelGraphPath' + 'outputPath': 'affiliationSemRelGraphPath', + 'iterations':'iterations' } @@ -1283,7 +1328,7 @@ { 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', 'workingDir' : '/tmp/beta_experiment/working_dir/blacklist', - 'postgresURL' : '', + 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : '', 'postgresPassword' : '' } @@ -1296,10 +1341,10 @@ - wf_20211206_093743_83 - 2021-12-06T10:12:32+00:00 - SUCCESS - + wf_20220111_200505_785 + 2022-01-11T20:08:53+00:00 + + - + \ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml index 2fed35f44..df9528f4c 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -11,6 +11,16 @@ IIS 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + set blacklist of funder nsPrefixes from the beta aggregator @@ -71,11 +81,31 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_inference/graph/05_graph_grouped + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_inference/graph/04_graph_dedup + /tmp/beta_inference/graph/06_graph_dedup @@ -85,7 +115,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_inference/graph/05_graph_consistent + /tmp/beta_inference/graph/07_graph_consistent @@ -95,7 +125,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_inference/graph/06_graph_cleaned + /tmp/beta_inference/graph/08_graph_cleaned @@ -548,6 +578,55 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -560,7 +639,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'groupedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml index e5ce3d710..0ea6be341 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -11,6 +11,16 @@ IIS 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_PROD/content + + + + + set blacklist of funder nsPrefixes @@ -61,11 +71,21 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/03_graph_resolved + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/prod_inference/graph/03_graph_dedup + /tmp/prod_inference/graph/04_graph_dedup @@ -75,7 +95,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/prod_inference/graph/04_graph_consistent + /tmp/prod_inference/graph/05_graph_consistent @@ -85,7 +105,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/prod_inference/graph/05_graph_cleaned + /tmp/prod_inference/graph/06_graph_cleaned @@ -347,6 +367,31 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + @@ -359,7 +404,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index f83337b3c..73c44aba8 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -11,6 +11,16 @@ Data Provision 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_BETA/content + + + + + set blacklist of funder nsPrefixes from the beta aggregator @@ -71,11 +81,31 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_provision/graph/04_graph_resolved + + + + + + + Set the target path to store the GROUPED graph + + groupedGraphPath + /tmp/beta_provision/graph/05_graph_grouped + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_provision/graph/04_graph_dedup + /tmp/beta_provision/graph/06_graph_dedup @@ -85,7 +115,7 @@ Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_provision/graph/05_graph_inferred + /tmp/beta_provision/graph/07_graph_inferred @@ -95,7 +125,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_provision/graph/06_graph_consistent + /tmp/beta_provision/graph/08_graph_consistent @@ -105,7 +135,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_provision/graph/07_graph_orcid + /tmp/beta_provision/graph/09_graph_orcid @@ -115,7 +145,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_provision/graph/08_graph_bulktagging + /tmp/beta_provision/graph/10_graph_bulktagging @@ -125,7 +155,17 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_provision/graph/09_graph_affiliation + /tmp/beta_provision/graph/11_graph_affiliation + + + + + + + Set the target path to store the AFFILIATION from SEMATIC RELATION graph + + affiliationSemRelGraphPath + /tmp/beta_provision/graph/12_graph_affiliationsr @@ -135,7 +175,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_provision/graph/10_graph_comunity_organization + /tmp/beta_provision/graph/13_graph_comunity_organization @@ -145,7 +185,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_provision/graph/11_graph_funding + /tmp/beta_provision/graph/14_graph_funding @@ -155,7 +195,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_provision/graph/12_graph_comunity_sem_rel + /tmp/beta_provision/graph/15_graph_comunity_sem_rel @@ -165,7 +205,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_provision/graph/13_graph_country + /tmp/beta_provision/graph/16_graph_country @@ -175,7 +215,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_provision/graph/14_graph_cleaned + /tmp/beta_provision/graph/17_graph_cleaned @@ -185,7 +225,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_provision/graph/15_graph_blacklisted + /tmp/beta_provision/graph/18_graph_blacklisted @@ -695,6 +735,55 @@ build-report + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/relation_resolution', + 'sparkExecutorCores' : '2', + 'sparkExecutorMemory' : '12G' + } + + + + + + + + Resolve Relation + + executeOozieJob + IIS + + { + 'graphBasePath':'resolvedGraphPath', + 'targetPath':'groupedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/group/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/grouping', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '7G' + } + + @@ -707,7 +796,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'groupedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml index be6155f2f..205db29a6 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -11,6 +11,16 @@ Data Provision 30 + + set the path of unresolved entities + + unresolvedEntityPath + /data/unresolved_PROD/content + + + + + set blacklist of funder nsPrefixes @@ -51,11 +61,21 @@ + + Set the target path to store the CLEANED graph + + resolvedGraphPath + /tmp/beta_inference/graph/03_graph_resolved + + + + + Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/prod_provision/graph/03_graph_dedup + /tmp/prod_provision/graph/04_graph_dedup @@ -65,7 +85,7 @@ Set the target path to store the INFERRED graph inferredGraphPath - /tmp/prod_provision/graph/04_graph_inferred + /tmp/prod_provision/graph/05_graph_inferred @@ -75,7 +95,7 @@ Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/prod_provision/graph/05_graph_consistent + /tmp/prod_provision/graph/06_graph_consistent @@ -85,7 +105,7 @@ Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/prod_provision/graph/06_graph_orcid + /tmp/prod_provision/graph/07_graph_orcid @@ -95,7 +115,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/prod_provision/graph/07_graph_bulktagging + /tmp/prod_provision/graph/08_graph_bulktagging @@ -105,7 +125,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/prod_provision/graph/08_graph_affiliation + /tmp/prod_provision/graph/09_graph_affiliation @@ -115,7 +135,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/prod_provision/graph/09_graph_comunity_organization + /tmp/prod_provision/graph/10_graph_comunity_organization @@ -125,7 +145,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/prod_provision/graph/10_graph_funding + /tmp/prod_provision/graph/11_graph_funding @@ -135,7 +155,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/prod_provision/graph/11_graph_comunity_sem_rel + /tmp/prod_provision/graph/12_graph_comunity_sem_rel @@ -145,7 +165,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/prod_provision/graph/12_graph_country + /tmp/prod_provision/graph/13_graph_country @@ -155,7 +175,7 @@ Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/prod_provision/graph/13_graph_cleaned + /tmp/prod_provision/graph/14_graph_cleaned @@ -165,7 +185,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/prod_provision/graph/14_graph_blacklisted + /tmp/prod_provision/graph/15_graph_blacklisted @@ -446,6 +466,59 @@ build-report + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'prepareInfo', + 'hostedByMapPath' : '/user/dnet.production/data/hostedByMap', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hostedbymap/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/hostedbymap', + 'outputPath' : '/tmp/prod_provision/working_dir/hostedbymap', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + Graph resolution + + executeOozieJob + IIS + + { + 'graphBasePath':'cleanedFirstGraphPath', + 'unresolvedPath' :'unresolvedEntityPath', + 'targetPath':'resolvedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/resolution/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/relation_resolution', + 'shouldResolveEntities' : 'false', + 'sparkExecutorCores' : '4', + 'sparkExecutorMemory' : '9G' + } + + @@ -458,7 +531,7 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'cleanedFirstGraphPath', + 'graphBasePath' : 'resolvedGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } diff --git a/pom.xml b/pom.xml index 7a026e668..86c3b4526 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.10.24] + [2.10.26-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6]