From 65cc736e2fb2848bef81f47715370b0f6a9064a4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Jul 2022 17:05:56 +0200 Subject: [PATCH 1/5] [Clean Country] first implementation to remove country NL from results collected from NARCIS when doi starts with mendely prefix --- .../oa/graph/clean/CleanCountrySparkJob.java | 128 +++++++++++++++++ .../graph/input_clean_country_parameters.json | 43 ++++++ .../dhp/oa/graph/clean/CleanCountryTest.java | 135 ++++++++++++++++++ .../clean/publication_clean_country.json | 7 + 4 files changed, 313 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountrySparkJob.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountrySparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountrySparkJob.java new file mode 100644 index 000000000..ec82d385c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountrySparkJob.java @@ -0,0 +1,128 @@ +package eu.dnetlib.dhp.oa.graph.clean; + +/** + * @author miriam.baglioni + * @Date 20/07/22 + */ + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.Optional; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + + +public class CleanCountrySparkJob implements Serializable { + private static final Logger log = LoggerFactory.getLogger(CleanCountrySparkJob.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + CleanContextSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); + + String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); + + String country = parser.get("country"); + log.info("country: {}", country); + + String verifyParam = parser.get("verifyParam"); + log.info("verifyParam: {}", verifyParam); + + String collectedfrom = parser.get("collectedfrom"); + log.info("collectedfrom: {}", collectedfrom); + + String graphTableClassName = parser.get("graphTableClassName"); + log.info("graphTableClassName: {}", graphTableClassName); + + Class entityClazz = (Class) Class.forName(graphTableClassName); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + + cleanCountry(spark, country, verifyParam, inputPath, entityClazz, workingPath,collectedfrom); + }); + } + + private static void cleanCountry(SparkSession spark, String country, String verifyParam, + String inputPath, Class entityClazz, String workingPath, String collectedfrom) { + Dataset res = spark + .read() + .textFile(inputPath) + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), + Encoders.bean(entityClazz)) + ; + + res.map((MapFunction) r -> { + for(StructuredProperty p: r.getPid()){ + if (p.getQualifier().getClassid().equalsIgnoreCase("doi") && + p.getValue().startsWith(verifyParam) && r.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equalsIgnoreCase(collectedfrom))) { + + r + .setCountry( + r + .getCountry() + .stream() + .filter( + c -> !c.getClassid() + .equalsIgnoreCase(country)) + .collect(Collectors.toList())); + } + } + + return r; + }, Encoders.bean(entityClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath); + + spark + .read() + .textFile(workingPath) + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), + Encoders.bean(entityClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath); + } + +} + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json new file mode 100644 index 000000000..e46f7b6da --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json @@ -0,0 +1,43 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "in", + "paramLongName": "inputPath", + "paramDescription": "the path to the graph data dump to read", + "paramRequired": true + }, + { + "paramName": "wp", + "paramLongName": "workingPath", + "paramDescription": "the path to store the output graph", + "paramRequired": true + }, + { + "paramName": "c", + "paramLongName": "country", + "paramDescription": "the id of the context to be removed", + "paramRequired": true + }, + { + "paramName": "class", + "paramLongName": "graphTableClassName", + "paramDescription": "class name moelling the graph table", + "paramRequired": true + },{ + "paramName": "vf", + "paramLongName": "verifyParam", + "paramDescription": "the parameter to be verified to remove the country", + "paramRequired": true +}, + { + "paramName": "cf", + "paramLongName": "collectedfrom", + "paramDescription": "the collectedfrom value for which we should apply the cleaning", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java new file mode 100644 index 000000000..f9dad9fcf --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java @@ -0,0 +1,135 @@ +package eu.dnetlib.dhp.oa.graph.clean; +/** + * @author miriam.baglioni + * @Date 20/07/22 + */ + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + + +public class CleanCountryTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(CleanContextTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(DumpJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(DumpJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testResultClean() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json") + .getPath(); + final String prefix = "gcube "; + + spark + .read() + .textFile(sourcePath) + .map( + (MapFunction) r -> OBJECT_MAPPER.readValue(r, Publication.class), + Encoders.bean(Publication.class)) + .write() + .json(workingDir.toString() + "/publication"); + + CleanCountrySparkJob.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", workingDir.toString() + "/publication", + "-graphTableClassName", Publication.class.getCanonicalName(), + "-workingPath", workingDir.toString() + "/working", + "-country", "NL", + "-verifyParam", "10.17632", + "-collectedfrom", "NARCIS" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(7, tmp.count()); + + // original result with NL country and doi starting with Mendely prefix, but not collectedfrom NARCIS + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) + .collect() + .get(0) + .getCountry() + .size()); + + // original result with NL country and pid not starting with Mendely prefix + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")) + .collect() + .get(0) + .getCountry() + .size()); + + // original result with NL country and doi starting with Mendely prefix and collectedfrom NARCIS + Assertions + .assertEquals( + 0, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")) + .collect() + .get(0) + .getCountry() + .size()); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json new file mode 100644 index 000000000..ebceecff9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json @@ -0,0 +1,7 @@ +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[{"classid": "NL", "classname": "DE", "schemeid": "dnet:countries", "schemename": "dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T16:14:18.452Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg., bijl."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"8250"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282676557,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:51:24Z","harvestDate":"2020-05-25T11:33:13.427Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013110","metadataNamespace":""}},"originalId":["DansKnawCris::0224aae28af558f21768dbc6439c7a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/daMendeley"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Gcube veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra Archeologie Rapportenreeks P0502381"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[{"classid": "NL", "classname": "DE", "schemeid": "dnet:countries", "schemename": "dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:03:57.761Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount": {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1396"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283087415,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:58:39Z","harvestDate":"2020-05-25T11:34:38.707Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800020324","metadataNamespace":""}},"originalId":["DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"van Someren, Christian","name":"Christian","pid":[],"rank":1,"surname":"van Someren"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"NARCIS"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[{"classid": "NL", "classname": "DE", "schemeid": "dnet:countries", "schemename": "dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:32.27Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Over het Energieakkoord. In het energieakkoord voor duurzame groei is afgesproken dat in 2020 14 procent van de opwek hernieuwbaar moet zijn en in 2023 16 procent. De doelstelling is een uitdagende opgave waarbij de eerste vraag is: \"Hoeveel hernieuwbare energie wordt er op dit moment opgewekt in Nederland?\" Deze website geeft antwoord op de vraag voor de actueel opgewekte windenergie, zonne-energie en biogas."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"NARCIS"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["http://energieopwek.nl/"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282663379,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-08T17:46:55Z","harvestDate":"2020-05-25T15:30:24.079Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/hbo:oai:hbokennisbank.nl:hanzepure:oai:research.hanze.nl:publications/813a5dfa-4fd0-44c4-8cbf-310324dc724d","metadataNamespace":""}},"originalId":["DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/daMendeley"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0038","classname":"0038","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"energieproductie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Management"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Policy and Law"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Energie interventies en gedrag"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"publieke ondersteuning en communicatie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Professional practice & society"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"subtitle","classname":"subtitle","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"GCUBE opwek"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:40.568Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282670417,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:41:34Z","harvestDate":"2020-05-25T11:40:05.974Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550052278","metadataNamespace":""}},"originalId":["DansKnawCris::3c9f068ddc930360bec6925488a9a97f"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None GCUBE"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T18:11:57.737Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met lit. opg"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282758835,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:36:00Z","harvestDate":"2020-05-25T11:30:47.199Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800007467","metadataNamespace":""}},"originalId":["DansKnawCris::4669a378a73661417182c208e6fdab53"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Vlaardingen"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gCube RAAP-rapport, ISSN 0925-6229 2089"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T18:18:18.049Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met literatuuropgave"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283010899,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:48:12Z","harvestDate":"2020-05-25T11:32:46.363Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800014509","metadataNamespace":""}},"originalId":["DansKnawCris::4a9152e80f860eab99072e921d74a0ff"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proefsleuvenonderzoek aan de Bredasebaan 8 te Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube Archeodienst rapport, ISSN 1877-2900 565"}],"journal":null} +{"author":[{"affiliation":null,"fullname":"Schubart, A.","name":"A.","pid":[],"rank":1,"surname":"Schubart"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"dateofcollection":"2019-02-17T01:33:44Z","dateoftransformation":null,"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"n/a"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.pdf","http://dx.doi.org/10.1524/phil.1866.24.14.561"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":null,"value":"cc0"},"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2578.35"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["https://zenodo.org/record/1634826/files/article.pdf"]},{"accessright":null,"collectedfrom":{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","https://academic.microsoft.com/#/detail/2601803436"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1866-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/1634826"]}],"language":null,"lastupdatetimestamp":1591290751234,"oaiprovenance":null,"originalId":["10.1524/phil.1866.24.14.561","2601803436","od______2659::f9ba286bcf8429160b58072028052325"],"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1524/phil.1866.24.14.561"}],"publisher":{"dataInfo":null,"value":"Walter de Gruyter GmbH"},"relevantdate":[{"dataInfo":null,"qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-03-26T10:09:50Z"},{"dataInfo":null,"qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1866-01-01"}],"resourcetype":{"classid":"Other","classname":"Other","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":null,"value":"Crossref"},{"dataInfo":null,"value":null}],"subject":[{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Theology"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"History"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Art history"}],"title":[{"dataInfo":null,"qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"XIV. Die wörter άγαλμα, είχών, ξόανον, ανδρuις und verwandte, in ihren verschiedenen beziehungen. Nach Pausanias"},{"dataInfo":null,"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube xiv die worter άγαλμα eίχών ξόανον ανδρuις und verwandte in ihren verschiedenen beziehungen nach pausanias"}],"journal":{"conferencedate":null,"conferenceplace":null,"dataInfo":null,"edition":null,"ep":null,"iss":null,"issnLinking":null,"issnOnline":"2196-7008","issnPrinted":"0031-7985","name":"Philologus","sp":null,"vol":"24"}} \ No newline at end of file From a12d28c6444616ac914d79b43e5360c4e92d714b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Jul 2022 16:23:12 +0200 Subject: [PATCH 2/5] [Clean Country] added logic not to remove country from result if it exist a hosting datasource with that country. Moreover the country will be removed only if added with propagation --- .../{ => country}/CleanCountrySparkJob.java | 40 ++++++-- .../country/GetDatasourceFromCountry.java | 99 +++++++++++++++++++ .../dhp/oa/graph/clean/CleanCountryTest.java | 3 +- 3 files changed, 130 insertions(+), 12 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/{ => country}/CleanCountrySparkJob.java (75%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountrySparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java similarity index 75% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountrySparkJob.java rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java index ec82d385c..1af1aafc0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountrySparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.oa.graph.clean; +package eu.dnetlib.dhp.oa.graph.clean.country; /** * @author miriam.baglioni @@ -7,6 +7,7 @@ package eu.dnetlib.dhp.oa.graph.clean; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import org.apache.commons.io.IOUtils; @@ -21,6 +22,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Serializable; +import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -54,10 +56,13 @@ public class CleanCountrySparkJob implements Serializable { String workingPath = parser.get("workingPath"); log.info("workingPath: {}", workingPath); + String datasourcePath = parser.get("datasourcePath"); + log.info("datasourcePath: {}", datasourcePath); + String country = parser.get("country"); log.info("country: {}", country); - String verifyParam = parser.get("verifyParam"); + String[] verifyParam = parser.get("verifyParam").split(";"); log.info("verifyParam: {}", verifyParam); String collectedfrom = parser.get("collectedfrom"); @@ -74,12 +79,17 @@ public class CleanCountrySparkJob implements Serializable { isSparkSessionManaged, spark -> { - cleanCountry(spark, country, verifyParam, inputPath, entityClazz, workingPath,collectedfrom); + cleanCountry(spark, country, verifyParam, inputPath, entityClazz, workingPath,collectedfrom, datasourcePath); }); } - private static void cleanCountry(SparkSession spark, String country, String verifyParam, - String inputPath, Class entityClazz, String workingPath, String collectedfrom) { + private static void cleanCountry(SparkSession spark, String country, String[] verifyParam, + String inputPath, Class entityClazz, String workingPath, String collectedfrom, String datasourcePath) { + + List hostedBy = spark.read().textFile(datasourcePath) + // .filter((FilterFunction) ds -> !ds.equals(collectedfrom)) + .collectAsList(); + Dataset res = spark .read() .textFile(inputPath) @@ -89,11 +99,14 @@ public class CleanCountrySparkJob implements Serializable { ; res.map((MapFunction) r -> { - for(StructuredProperty p: r.getPid()){ - if (p.getQualifier().getClassid().equalsIgnoreCase("doi") && - p.getValue().startsWith(verifyParam) && r.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equalsIgnoreCase(collectedfrom))) { + if(r.getInstance().stream().anyMatch(i -> hostedBy.contains(i.getHostedby().getKey())) || + !r.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equals(collectedfrom))){ + return r; + } - r + if(r.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals("doi") && pidInParam(p.getValue(), verifyParam)) + && r.getCountry().stream().anyMatch(c -> c.getClassid().equals(country) && c.getDataInfo().getInferenceprovenance().equals("propagation"))) + { r .setCountry( r .getCountry() @@ -102,7 +115,7 @@ public class CleanCountrySparkJob implements Serializable { c -> !c.getClassid() .equalsIgnoreCase(country)) .collect(Collectors.toList())); - } + } return r; @@ -124,5 +137,12 @@ public class CleanCountrySparkJob implements Serializable { .json(inputPath); } + private static boolean pidInParam(String value, String[] verifyParam) { + for (String s : verifyParam ) + if (value.startsWith(s)) + return true; + return false; + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java new file mode 100644 index 000000000..cdca96107 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java @@ -0,0 +1,99 @@ +package eu.dnetlib.dhp.oa.graph.clean.country; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +/** + * @author miriam.baglioni + * @Date 22/07/22 + */ +public class GetDatasourceFromCountry implements Serializable { + private static final Logger log = LoggerFactory.getLogger(GetDatasourceFromCountry.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + GetDatasourceFromCountry.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); + + String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); + + String country = parser.get("country"); + log.info("country: {}", country); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + + getDatasourceFromCountry(spark, country, inputPath, workingPath); + }); + } + + private static void getDatasourceFromCountry(SparkSession spark, String country, String inputPath, String workingPath) { + + Dataset organization = spark.read().textFile(inputPath + "/organization") + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, Organization.class), + Encoders.bean(Organization.class)) + .filter( + (FilterFunction) o -> !o.getDataInfo().getDeletedbyinference() && + o.getCountry().getClassid().length() > 0 && + o.getCountry().getClassid().equals(country));; + + + // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass + Dataset relation = spark.read().textFile( inputPath + "/relation") + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, Relation.class), + Encoders.bean(Relation.class)) + .filter( + (FilterFunction) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) && + !rel.getDataInfo().getDeletedbyinference()); + + organization.joinWith(relation, organization.col("id").equalTo(relation.col("target")), "left") + .map((MapFunction, String>) t2 -> t2._2().getSource(), Encoders.STRING() ) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(workingPath); + + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java index f9dad9fcf..a72093156 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java @@ -5,9 +5,9 @@ package eu.dnetlib.dhp.oa.graph.clean; */ import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob; import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -25,7 +25,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.List; public class CleanCountryTest { From 5968ec018db3e9292d8be92552199483791d4ed5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Jul 2022 16:48:38 +0200 Subject: [PATCH 3/5] [Clean Country] modified workflow and added param file --- .../country/GetDatasourceFromCountry.java | 2 +- .../dhp/oa/graph/clean/oozie_app/workflow.xml | 175 +++++++++++++++++- .../graph/input_clean_country_parameters.json | 6 + .../input_datasource_country_parameters.json | 26 +++ 4 files changed, 204 insertions(+), 5 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java index cdca96107..c255c445f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java @@ -38,7 +38,7 @@ public class GetDatasourceFromCountry implements Serializable { .toString( GetDatasourceFromCountry.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index 0cf6cdd05..2ba0a7ad7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -14,8 +14,8 @@ the address of the lookUp service - shouldCleanContext - true if the context have to be cleaned + shouldClean + true if the operation of deletion of not needed values from the results have to be performed contextId @@ -30,6 +30,22 @@ It is the constrint to be verified. This time is hardcoded as gcube and it is searched for in the title. If title starts with gcube than the context sobigdata will be removed by the result if present + + verifyCountryParam + 10.17632;10.5061 + It is the constraints to be verified. This time is hardcoded as the starting doi from mendeley and dryad and it is searched for in + the pid value. If the pid value starts with one of the two prefixes, then the country may be removed + + + country + NL + It is the country to be removed from the set of countries if it is present with provenance propagation. The country will not be removed if in one of the isntances there is a datasource with country `country` + + + collectedfrom + NARCIS + the only datasource for which the country NL will be removed from the country list + sparkDriverMemory @@ -296,7 +312,7 @@ - ${wf:conf('shouldCleanContext') eq true} + ${wf:conf('shouldClean') eq true} @@ -416,7 +432,158 @@ - + + + + + + yarn + cluster + Clean publications context + eu.dnetlib.dhp.oa.graph.clean.country.GetDatasourceFromCountry + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath} + --workingPath${workingDir}/working/hostedby + --country${country} + + + + + + + + + + + + + + + yarn + cluster + Clean publications counmtry + eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath}/publication + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication + --workingPath${workingDir}/working/publication + --country${country} + --verifyParam${verifyCountryParam} + --datasourcePath${workingDir}/working/hostedby + --collectedfrom${collectedfrom} + + + + + + + + yarn + cluster + Clean datasets Country + eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath}/dataset + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset + --workingPath${workingDir}/working/dataset + --country${country} + --verifyParam${verifyCountryParam} + --datasourcePath${workingDir}/working/hostedby + --collectedfrom${collectedfrom} + + + + + + + + yarn + cluster + Clean otherresearchproducts country + eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath}/otherresearchproduct + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --workingPath${workingDir}/working/otherresearchproduct + --country${country} + --verifyParam${verifyCountryParam} + --datasourcePath${workingDir}/working/hostedby + --collectedfrom${collectedfrom} + + + + + + + + yarn + cluster + Clean softwares country + eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --inputPath${graphOutputPath}/software + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software + --workingPath${workingDir}/working/software + --country${country} + --verifyParam${verifyCountryParam} + --datasourcePath${workingDir}/working/hostedby + --collectedfrom${collectedfrom} + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json index e46f7b6da..318fb22f8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json @@ -39,5 +39,11 @@ "paramLongName": "collectedfrom", "paramDescription": "the collectedfrom value for which we should apply the cleaning", "paramRequired": true + }, + { + "paramName": "hb", + "paramLongName": "hostedBy", + "paramDescription": "the set of datasources having the specified country in the graph searched for in the hostedby of the results", + "paramRequired": true } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json new file mode 100644 index 000000000..e0aa60328 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "in", + "paramLongName": "inputPath", + "paramDescription": "the path to the graph data dump to read", + "paramRequired": true + }, + { + "paramName": "wp", + "paramLongName": "workingPath", + "paramDescription": "the path to store the output graph", + "paramRequired": true + }, + { + "paramName": "c", + "paramLongName": "country", + "paramDescription": "the id of the context to be removed", + "paramRequired": true + } +] From 62d2138806a9740085174e819b361ee19640366f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 8 Aug 2022 14:10:47 +0200 Subject: [PATCH 4/5] [Clean Context] changed a bit the logic. Added the check not to have result hosted by a datasource of type institutional repository from NL. Added also the check that the country should have been included in the result via propagation for it to be removed --- .../clean/country/CleanCountrySparkJob.java | 231 ++++++++++-------- .../country/GetDatasourceFromCountry.java | 139 ++++++----- .../dhp/oa/graph/clean/CleanCountryTest.java | 212 ++++++++-------- .../eu/dnetlib/dhp/oa/graph/clean/hostedBy | 0 .../clean/publication_clean_country.json | 3 +- 5 files changed, 315 insertions(+), 270 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/hostedBy diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java index 1af1aafc0..3ba84a317 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java @@ -1,15 +1,15 @@ + package eu.dnetlib.dhp.oa.graph.clean.country; -/** - * @author miriam.baglioni - * @Date 20/07/22 - */ +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import javax.swing.text.html.Option; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -21,128 +21,147 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Serializable; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +/** + * @author miriam.baglioni + * @Date 20/07/22 + */ +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob; +import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class CleanCountrySparkJob implements Serializable { - private static final Logger log = LoggerFactory.getLogger(CleanCountrySparkJob.class); + private static final Logger log = LoggerFactory.getLogger(CleanCountrySparkJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - CleanContextSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); + String jsonConfiguration = IOUtils + .toString( + CleanContextSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/input_clean_country_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputPath = parser.get("inputPath"); - log.info("inputPath: {}", inputPath); + String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); - String workingPath = parser.get("workingPath"); - log.info("workingPath: {}", workingPath); + String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); - String datasourcePath = parser.get("datasourcePath"); - log.info("datasourcePath: {}", datasourcePath); + String datasourcePath = parser.get("hostedBy"); + log.info("datasourcePath: {}", datasourcePath); - String country = parser.get("country"); - log.info("country: {}", country); + String country = parser.get("country"); + log.info("country: {}", country); - String[] verifyParam = parser.get("verifyParam").split(";"); - log.info("verifyParam: {}", verifyParam); + String[] verifyParam = parser.get("verifyParam").split(";"); + log.info("verifyParam: {}", verifyParam); - String collectedfrom = parser.get("collectedfrom"); - log.info("collectedfrom: {}", collectedfrom); + String collectedfrom = parser.get("collectedfrom"); + log.info("collectedfrom: {}", collectedfrom); - String graphTableClassName = parser.get("graphTableClassName"); - log.info("graphTableClassName: {}", graphTableClassName); + String graphTableClassName = parser.get("graphTableClassName"); + log.info("graphTableClassName: {}", graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + Class entityClazz = (Class) Class.forName(graphTableClassName); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { - cleanCountry(spark, country, verifyParam, inputPath, entityClazz, workingPath,collectedfrom, datasourcePath); - }); - } + cleanCountry( + spark, country, verifyParam, inputPath, entityClazz, workingPath, collectedfrom, datasourcePath); + }); + } - private static void cleanCountry(SparkSession spark, String country, String[] verifyParam, - String inputPath, Class entityClazz, String workingPath, String collectedfrom, String datasourcePath) { + private static void cleanCountry(SparkSession spark, String country, String[] verifyParam, + String inputPath, Class entityClazz, String workingPath, String collectedfrom, String datasourcePath) { - List hostedBy = spark.read().textFile(datasourcePath) - // .filter((FilterFunction) ds -> !ds.equals(collectedfrom)) - .collectAsList(); + List hostedBy = spark + .read() + .textFile(datasourcePath) + // .filter((FilterFunction) ds -> !ds.equals(collectedfrom)) + .collectAsList(); - Dataset res = spark - .read() - .textFile(inputPath) - .map( - (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), - Encoders.bean(entityClazz)) - ; + Dataset res = spark + .read() + .textFile(inputPath) + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), + Encoders.bean(entityClazz)); - res.map((MapFunction) r -> { - if(r.getInstance().stream().anyMatch(i -> hostedBy.contains(i.getHostedby().getKey())) || - !r.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equals(collectedfrom))){ - return r; - } + res.map((MapFunction) r -> { + if (r.getInstance().stream().anyMatch(i -> hostedBy.contains(i.getHostedby().getKey())) || + !r.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equals(collectedfrom))) { + return r; + } - if(r.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals("doi") && pidInParam(p.getValue(), verifyParam)) - && r.getCountry().stream().anyMatch(c -> c.getClassid().equals(country) && c.getDataInfo().getInferenceprovenance().equals("propagation"))) - { r - .setCountry( - r - .getCountry() - .stream() - .filter( - c -> !c.getClassid() - .equalsIgnoreCase(country)) - .collect(Collectors.toList())); + if (r + .getPid() + .stream() + .anyMatch(p -> p.getQualifier().getClassid().equals("doi") && pidInParam(p.getValue(), verifyParam))) { + r + .setCountry( + r + .getCountry() + .stream() + .filter( + c -> toTakeCountry(c, country)) + .collect(Collectors.toList())); - } + } - return r; - }, Encoders.bean(entityClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(workingPath); + return r; + }, Encoders.bean(entityClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath); - spark - .read() - .textFile(workingPath) - .map( - (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), - Encoders.bean(entityClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath); - } + spark + .read() + .textFile(workingPath) + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, entityClazz), + Encoders.bean(entityClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath); + } - private static boolean pidInParam(String value, String[] verifyParam) { - for (String s : verifyParam ) - if (value.startsWith(s)) - return true; - return false; - } + private static boolean pidInParam(String value, String[] verifyParam) { + for (String s : verifyParam) + if (value.startsWith(s)) + return true; + return false; + } + + private static boolean toTakeCountry(Country c, String country) { + // If dataInfo is not set, or dataInfo.inferenceprovenance is not set or not present then it cannot be + // inserted via propagation + if (!Optional.ofNullable(c.getDataInfo()).isPresent()) + return true; + if (!Optional.ofNullable(c.getDataInfo().getInferenceprovenance()).isPresent()) + return true; + return !(c + .getClassid() + .equalsIgnoreCase(country) && + c.getDataInfo().getInferenceprovenance().equals("propagation")); + } } - diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java index c255c445f..dd5af6998 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java @@ -1,10 +1,12 @@ + package eu.dnetlib.dhp.oa.graph.clean.country; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -15,85 +17,92 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; -import java.io.Serializable; -import java.util.List; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - /** * @author miriam.baglioni * @Date 22/07/22 */ public class GetDatasourceFromCountry implements Serializable { - private static final Logger log = LoggerFactory.getLogger(GetDatasourceFromCountry.class); + private static final Logger log = LoggerFactory.getLogger(GetDatasourceFromCountry.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - GetDatasourceFromCountry.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); + String jsonConfiguration = IOUtils + .toString( + GetDatasourceFromCountry.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/input_datasource_country_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputPath = parser.get("inputPath"); - log.info("inputPath: {}", inputPath); + String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); - String workingPath = parser.get("workingPath"); - log.info("workingPath: {}", workingPath); + String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); - String country = parser.get("country"); - log.info("country: {}", country); + String country = parser.get("country"); + log.info("country: {}", country); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { - getDatasourceFromCountry(spark, country, inputPath, workingPath); - }); - } + getDatasourceFromCountry(spark, country, inputPath, workingPath); + }); + } - private static void getDatasourceFromCountry(SparkSession spark, String country, String inputPath, String workingPath) { + private static void getDatasourceFromCountry(SparkSession spark, String country, String inputPath, + String workingPath) { - Dataset organization = spark.read().textFile(inputPath + "/organization") - .map( - (MapFunction) value -> OBJECT_MAPPER.readValue(value, Organization.class), - Encoders.bean(Organization.class)) - .filter( - (FilterFunction) o -> !o.getDataInfo().getDeletedbyinference() && - o.getCountry().getClassid().length() > 0 && - o.getCountry().getClassid().equals(country));; + Dataset organization = spark + .read() + .textFile(inputPath + "/organization") + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, Organization.class), + Encoders.bean(Organization.class)) + .filter( + (FilterFunction) o -> !o.getDataInfo().getDeletedbyinference() && + o.getCountry().getClassid().length() > 0 && + o.getCountry().getClassid().equals(country)); + ; + // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass + Dataset relation = spark + .read() + .textFile(inputPath + "/relation") + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, Relation.class), + Encoders.bean(Relation.class)) + .filter( + (FilterFunction) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) && + !rel.getDataInfo().getDeletedbyinference()); - // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass - Dataset relation = spark.read().textFile( inputPath + "/relation") - .map( - (MapFunction) value -> OBJECT_MAPPER.readValue(value, Relation.class), - Encoders.bean(Relation.class)) - .filter( - (FilterFunction) rel -> rel.getRelClass().equalsIgnoreCase(ModelConstants.IS_PROVIDED_BY) && - !rel.getDataInfo().getDeletedbyinference()); + organization + .joinWith(relation, organization.col("id").equalTo(relation.col("target")), "left") + .map((MapFunction, String>) t2 -> t2._2().getSource(), Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(workingPath); - organization.joinWith(relation, organization.col("id").equalTo(relation.col("target")), "left") - .map((MapFunction, String>) t2 -> t2._2().getSource(), Encoders.STRING() ) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(workingPath); - - } + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java index a72093156..c9f846570 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleanCountryTest.java @@ -1,13 +1,10 @@ -package eu.dnetlib.dhp.oa.graph.clean; -/** - * @author miriam.baglioni - * @Date 20/07/22 - */ -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob; -import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; -import eu.dnetlib.dhp.schema.oaf.Publication; +package eu.dnetlib.dhp.oa.graph.clean; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -22,113 +19,132 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +/** + * @author miriam.baglioni + * @Date 20/07/22 + */ +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.clean.country.CleanCountrySparkJob; +import eu.dnetlib.dhp.schema.oaf.Publication; public class CleanCountryTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - private static final Logger log = LoggerFactory.getLogger(CleanContextTest.class); + private static final Logger log = LoggerFactory.getLogger(CleanContextTest.class); - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(CleanCountryTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); - conf.setAppName(DumpJobTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(CleanCountryTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(DumpJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + spark = SparkSession + .builder() + .appName(CleanCountryTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } - @Test - public void testResultClean() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json") - .getPath(); - final String prefix = "gcube "; + @Test + public void testResultClean() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json") + .getPath(); - spark - .read() - .textFile(sourcePath) - .map( - (MapFunction) r -> OBJECT_MAPPER.readValue(r, Publication.class), - Encoders.bean(Publication.class)) - .write() - .json(workingDir.toString() + "/publication"); + spark + .read() + .textFile(sourcePath) + .map( + (MapFunction) r -> OBJECT_MAPPER.readValue(r, Publication.class), + Encoders.bean(Publication.class)) + .write() + .json(workingDir.toString() + "/publication"); - CleanCountrySparkJob.main(new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--inputPath", workingDir.toString() + "/publication", - "-graphTableClassName", Publication.class.getCanonicalName(), - "-workingPath", workingDir.toString() + "/working", - "-country", "NL", - "-verifyParam", "10.17632", - "-collectedfrom", "NARCIS" - }); + CleanCountrySparkJob.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", workingDir.toString() + "/publication", + "-graphTableClassName", Publication.class.getCanonicalName(), + "-workingPath", workingDir.toString() + "/working", + "-country", "NL", + "-verifyParam", "10.17632", + "-collectedfrom", "NARCIS", + "-hostedBy", getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/clean/hostedBy") + .getPath() + }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); - Assertions.assertEquals(7, tmp.count()); + Assertions.assertEquals(8, tmp.count()); - // original result with NL country and doi starting with Mendely prefix, but not collectedfrom NARCIS - Assertions - .assertEquals( - 1, - tmp - .filter(p -> p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) - .collect() - .get(0) - .getCountry() - .size()); + // original result with NL country and doi starting with Mendely prefix, but not collectedfrom NARCIS + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) + .collect() + .get(0) + .getCountry() + .size()); - // original result with NL country and pid not starting with Mendely prefix - Assertions - .assertEquals( - 1, - tmp - .filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")) - .collect() - .get(0) - .getCountry() - .size()); + // original result with NL country and pid not starting with Mendely prefix + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")) + .collect() + .get(0) + .getCountry() + .size()); - // original result with NL country and doi starting with Mendely prefix and collectedfrom NARCIS - Assertions - .assertEquals( - 0, - tmp - .filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")) - .collect() - .get(0) - .getCountry() - .size()); - } + // original result with NL country and doi starting with Mendely prefix and collectedfrom NARCIS but not + // inserted with propagation + Assertions + .assertEquals( + 1, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")) + .collect() + .get(0) + .getCountry() + .size()); + + // original result with NL country and doi starting with Mendely prefix and collectedfrom NARCIS inserted with + // propagation + Assertions + .assertEquals( + 0, + tmp + .filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6ag")) + .collect() + .get(0) + .getCountry() + .size()); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/hostedBy b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/hostedBy new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json index ebceecff9..4916eee73 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/publication_clean_country.json @@ -4,4 +4,5 @@ {"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:40.568Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282670417,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:41:34Z","harvestDate":"2020-05-25T11:40:05.974Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550052278","metadataNamespace":""}},"originalId":["DansKnawCris::3c9f068ddc930360bec6925488a9a97f"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None GCUBE"}],"journal":null} {"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T18:11:57.737Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met lit. opg"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282758835,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:36:00Z","harvestDate":"2020-05-25T11:30:47.199Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800007467","metadataNamespace":""}},"originalId":["DansKnawCris::4669a378a73661417182c208e6fdab53"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Vlaardingen"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gCube RAAP-rapport, ISSN 0925-6229 2089"}],"journal":null} {"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T18:18:18.049Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met literatuuropgave"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283010899,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:48:12Z","harvestDate":"2020-05-25T11:32:46.363Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800014509","metadataNamespace":""}},"originalId":["DansKnawCris::4a9152e80f860eab99072e921d74a0ff"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proefsleuvenonderzoek aan de Bredasebaan 8 te Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube Archeodienst rapport, ISSN 1877-2900 565"}],"journal":null} -{"author":[{"affiliation":null,"fullname":"Schubart, A.","name":"A.","pid":[],"rank":1,"surname":"Schubart"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"dateofcollection":"2019-02-17T01:33:44Z","dateoftransformation":null,"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"n/a"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.pdf","http://dx.doi.org/10.1524/phil.1866.24.14.561"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":null,"value":"cc0"},"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2578.35"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["https://zenodo.org/record/1634826/files/article.pdf"]},{"accessright":null,"collectedfrom":{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","https://academic.microsoft.com/#/detail/2601803436"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1866-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/1634826"]}],"language":null,"lastupdatetimestamp":1591290751234,"oaiprovenance":null,"originalId":["10.1524/phil.1866.24.14.561","2601803436","od______2659::f9ba286bcf8429160b58072028052325"],"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1524/phil.1866.24.14.561"}],"publisher":{"dataInfo":null,"value":"Walter de Gruyter GmbH"},"relevantdate":[{"dataInfo":null,"qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-03-26T10:09:50Z"},{"dataInfo":null,"qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1866-01-01"}],"resourcetype":{"classid":"Other","classname":"Other","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":null,"value":"Crossref"},{"dataInfo":null,"value":null}],"subject":[{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Theology"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"History"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Art history"}],"title":[{"dataInfo":null,"qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"XIV. Die wörter άγαλμα, είχών, ξόανον, ανδρuις und verwandte, in ihren verschiedenen beziehungen. Nach Pausanias"},{"dataInfo":null,"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube xiv die worter άγαλμα eίχών ξόανον ανδρuις und verwandte in ihren verschiedenen beziehungen nach pausanias"}],"journal":{"conferencedate":null,"conferenceplace":null,"dataInfo":null,"edition":null,"ep":null,"iss":null,"issnLinking":null,"issnOnline":"2196-7008","issnPrinted":"0031-7985","name":"Philologus","sp":null,"vol":"24"}} \ No newline at end of file +{"author":[{"affiliation":null,"fullname":"Schubart, A.","name":"A.","pid":[],"rank":1,"surname":"Schubart"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"dateofcollection":"2019-02-17T01:33:44Z","dateoftransformation":null,"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"n/a"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.pdf","http://dx.doi.org/10.1524/phil.1866.24.14.561"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":null,"value":"cc0"},"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2578.35"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["https://zenodo.org/record/1634826/files/article.pdf"]},{"accessright":null,"collectedfrom":{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","https://academic.microsoft.com/#/detail/2601803436"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1866-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/1634826"]}],"language":null,"lastupdatetimestamp":1591290751234,"oaiprovenance":null,"originalId":["10.1524/phil.1866.24.14.561","2601803436","od______2659::f9ba286bcf8429160b58072028052325"],"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1524/phil.1866.24.14.561"}],"publisher":{"dataInfo":null,"value":"Walter de Gruyter GmbH"},"relevantdate":[{"dataInfo":null,"qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-03-26T10:09:50Z"},{"dataInfo":null,"qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1866-01-01"}],"resourcetype":{"classid":"Other","classname":"Other","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":null,"value":"Crossref"},{"dataInfo":null,"value":null}],"subject":[{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Theology"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"History"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Art history"}],"title":[{"dataInfo":null,"qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"XIV. Die wörter άγαλμα, είχών, ξόανον, ανδρuις und verwandte, in ihren verschiedenen beziehungen. Nach Pausanias"},{"dataInfo":null,"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"gcube xiv die worter άγαλμα eίχών ξόανον ανδρuις und verwandte in ihren verschiedenen beziehungen nach pausanias"}],"journal":{"conferencedate":null,"conferenceplace":null,"dataInfo":null,"edition":null,"ep":null,"iss":null,"issnLinking":null,"issnOnline":"2196-7008","issnPrinted":"0031-7985","name":"Philologus","sp":null,"vol":"24"}} +{"author":[{"affiliation":[],"fullname":"van Someren, Christian","name":"Christian","pid":[],"rank":1,"surname":"van Someren"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"NARCIS"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"sobigdata::projects::2"}],"contributor":[],"country":[{"classid": "NL", "classname": "DE", "schemeid": "dnet:countries", "schemename": "dnet:countries","dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}}],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:32.27Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Over het Energieakkoord. In het energieakkoord voor duurzame groei is afgesproken dat in 2020 14 procent van de opwek hernieuwbaar moet zijn en in 2023 16 procent. De doelstelling is een uitdagende opgave waarbij de eerste vraag is: \"Hoeveel hernieuwbare energie wordt er op dit moment opgewekt in Nederland?\" Deze website geeft antwoord op de vraag voor de actueel opgewekte windenergie, zonne-energie en biogas."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6ag","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"NARCIS"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["http://energieopwek.nl/"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282663379,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-08T17:46:55Z","harvestDate":"2020-05-25T15:30:24.079Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/hbo:oai:hbokennisbank.nl:hanzepure:oai:research.hanze.nl:publications/813a5dfa-4fd0-44c4-8cbf-310324dc724d","metadataNamespace":""}},"originalId":["DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/daMendeley"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0038","classname":"0038","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"energieproductie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Management"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Policy and Law"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Energie interventies en gedrag"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"publieke ondersteuning en communicatie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Professional practice & society"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"subtitle","classname":"subtitle","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"GCUBE opwek"}],"journal":null} From 7dbdd4a0fe9832273790a777b7bc54895b35df93 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 10 Aug 2022 15:13:10 +0200 Subject: [PATCH 5/5] [Clean Country]changes related to https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/241#issuecomment-6766 --- .../dhp/oa/graph/clean/country/CleanCountrySparkJob.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java index 3ba84a317..30991e9d7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/CleanCountrySparkJob.java @@ -10,6 +10,7 @@ import java.util.stream.Collectors; import javax.swing.text.html.Option; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -94,7 +95,6 @@ public class CleanCountrySparkJob implements Serializable { List hostedBy = spark .read() .textFile(datasourcePath) - // .filter((FilterFunction) ds -> !ds.equals(collectedfrom)) .collectAsList(); Dataset res = spark @@ -113,7 +113,8 @@ public class CleanCountrySparkJob implements Serializable { if (r .getPid() .stream() - .anyMatch(p -> p.getQualifier().getClassid().equals("doi") && pidInParam(p.getValue(), verifyParam))) { + .anyMatch(p -> p.getQualifier().getClassid() + .equals(PidType.doi) && pidInParam(p.getValue(), verifyParam))) { r .setCountry( r