From e87b790a609835a0666b527f42e5e0af8331b8fc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 22 Jun 2023 16:54:13 +0200 Subject: [PATCH] - --- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 6 +- .../oa/graph/dump/csv/SparkDumpResults.java | 28 +- .../SparkSelectResultsAndDumpRelations.java | 241 ++++++++++++++++++ .../oa/graph/dump/csv/oozie_app/workflow.xml | 2 +- .../serafeim/oozie_app/config-default.xml | 30 +++ .../dump/serafeim/oozie_app/workflow.xml | 102 ++++++++ .../dhp/oa/graph/dump/csv/DumpResultTest.java | 109 ++++---- .../graph/dump/csv/MoveOnSingleDirTest.java | 6 +- .../dhp/oa/graph/dump/csv/input/publication | 3 +- .../dhp/oa/graph/dump/csv/input/relation | 3 +- .../dump/csv/working/publication/result/part0 | 3 +- .../dump/csv/working/resultIds/part-00000 | 1 + pom.xml | 2 +- 13 files changed, 466 insertions(+), 70 deletions(-) create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/serafeim/SparkSelectResultsAndDumpRelations.java create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 685af91..71c10be 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -82,9 +82,9 @@ public class SendToZenodoHDFS implements Serializable { if (!pString.endsWith("_SUCCESS")) { String name = pString.substring(pString.lastIndexOf("/") + 1); - FSDataInputStream inputStream = fileSystem.open(p); - zenodoApiClient.uploadIS(inputStream, name); - + try (FSDataInputStream inputStream = fileSystem.open(p)) { + zenodoApiClient.uploadIS(inputStream, name); + } } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkDumpResults.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkDumpResults.java index 9fd10f1..f94ad8f 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkDumpResults.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/SparkDumpResults.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.dump.csv; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static org.apache.commons.lang3.StringUtils.remove; import static org.apache.commons.lang3.StringUtils.split; import java.io.Serializable; @@ -87,6 +88,7 @@ public class SparkDumpResults implements Serializable { Class inputClazz, String resultType, String workingPath) { Dataset resultIds = spark.read().textFile(workingPath + "/resultIds"); + // resultIds.foreach((ForeachFunction) r -> System.out.println(r)); Dataset results = Utils .readPath(spark, inputPath + "/" + resultType, inputClazz) .filter( @@ -108,8 +110,6 @@ public class SparkDumpResults implements Serializable { Encoders.bean(CSVResult.class)) .write() .option("compression", "gzip") -// .option("header", "true") -// .option("delimiter", Constants.SEP) .mode(SaveMode.Overwrite) .json(workingPath + "/" + resultType + "/result"); @@ -125,8 +125,6 @@ public class SparkDumpResults implements Serializable { .filter(Objects::nonNull) .write() .option("compression", "gzip") -// .option("header", "true") -// .option("delimiter", Constants.SEP) .mode(SaveMode.Overwrite) .json(workingPath + "/" + resultType + "/result_pid"); @@ -186,8 +184,6 @@ public class SparkDumpResults implements Serializable { Encoders.bean(CSVRelResAut.class)) .write() .option("compression", "gzip") -// .option("header", "true") -// .option("delimiter", Constants.SEP) .mode(SaveMode.Overwrite) .json(workingPath + "/" + resultType + "/result_author"); @@ -199,8 +195,6 @@ public class SparkDumpResults implements Serializable { Encoders.bean(CSVAuthor.class)) .write() .option("compression", "gzip") -// .option("header", "true") -// .option("delimiter", Constants.SEP) .mode(SaveMode.Overwrite) .json(workingPath + "/" + resultType + "/author"); @@ -264,7 +258,7 @@ public class SparkDumpResults implements Serializable { private static String getFieldValue(Field input) { if (input != null && StringUtils.isNotEmpty(input.getValue())) { - return input.getValue(); + return removeBreaks(input.getValue()); } else { return ""; } @@ -283,7 +277,7 @@ public class SparkDumpResults implements Serializable { if (Optional.ofNullable(r.getSubject()).isPresent()) ret.setKeywords(String.join(", ", r.getSubject().stream().map(s -> { if (StringUtils.isNotEmpty(s.getValue())) - return s.getValue().toLowerCase(); + return removeBreaks(s.getValue().toLowerCase()); else return null; }).filter(Objects::nonNull).distinct().collect(Collectors.toList()))); @@ -311,7 +305,7 @@ public class SparkDumpResults implements Serializable { return ""; for (Field abs : description) { if (StringUtils.isNotEmpty(abs.getValue())) { - return abs.getValue(); + return removeBreaks(abs.getValue()); } } return ""; @@ -322,14 +316,22 @@ public class SparkDumpResults implements Serializable { for (StructuredProperty title : titles) { if (StringUtils.isEmpty(firstTitle)) { if (StringUtils.isNotEmpty(title.getValue())) - firstTitle = title.getValue(); + firstTitle = removeBreaks(title.getValue()); } if (title.getQualifier().getClassid().equals(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())) { if (StringUtils.isNotEmpty(title.getValue())) - return title.getValue(); + return removeBreaks(title.getValue()); } } + if (firstTitle != null) { + return removeBreaks(firstTitle); + } return ""; } + private static String removeBreaks(String input) { + return input.replace("\n", " ").replace("\t", " ").replace("\r", " "); + + } + } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/serafeim/SparkSelectResultsAndDumpRelations.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/serafeim/SparkSelectResultsAndDumpRelations.java new file mode 100644 index 0000000..1f31c3c --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/serafeim/SparkSelectResultsAndDumpRelations.java @@ -0,0 +1,241 @@ + +package eu.dnetlib.dhp.oa.graph.dump.serafeim; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; + +/** + * @author miriam.baglioni + * @Date 04/05/23 + */ +//STEP 2 +public class SparkSelectResultsAndDumpRelations implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkSelectResultsAndDumpRelations.class); + private static String RESULT_COMMUNITY_TABLE = "/result_community"; + private static String COMMUNITY_RESULT_IDS = "/communityResultIds"; + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkSelectResultsAndDumpRelations.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String workingPath = parser.get("workingPath"); + + List communityList = null; + Optional communities = Optional.ofNullable(parser.get("communities")); + if (communities.isPresent()) { + communityList = Arrays.asList(communities.get().split(";")); + } + + SparkConf conf = new SparkConf(); + + List finalCommunityList = communityList; + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + run(spark, inputPath, outputPath, workingPath, finalCommunityList); + + }); + + } + + private static void run(SparkSession spark, String inputPath, String outputPath, + String workingPath, + List communityList) { + + // select the result ids related to the set of communities considered + writeCommunityRelatedIds( + spark, inputPath, Publication.class, communityList, workingPath, "publication"); + writeCommunityRelatedIds( + spark, inputPath, Dataset.class, communityList, workingPath, "dataset"); + writeCommunityRelatedIds( + spark, inputPath, Software.class, communityList, workingPath, "software"); + writeCommunityRelatedIds( + spark, inputPath, OtherResearchProduct.class, communityList, + workingPath, "otherresearchproduct"); + + // select the relations with semantics cites + org.apache.spark.sql.Dataset relations = Utils + .readPath(spark, inputPath + "/relation", Relation.class) + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getRelClass().equals(ModelConstants.CITES)); + + // select the relations having as source one of the results related to the + // communities + org.apache.spark.sql.Dataset communityResultIds = spark + .read() + .textFile(workingPath + COMMUNITY_RESULT_IDS) + .distinct(); + + Utils + .readPath(spark, inputPath + "/publication", Publication.class) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() + && !p.getDataInfo().getInvisible()) + .map((MapFunction) p -> p.getId(), Encoders.STRING()) + .union( + Utils + .readPath(spark, inputPath + "/dataset", Dataset.class) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() + && !p.getDataInfo().getInvisible()) + .map((MapFunction) p -> p.getId(), Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/software", Software.class) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() + && !p.getDataInfo().getInvisible()) + .map((MapFunction) p -> p.getId(), Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() + && !p.getDataInfo().getInvisible()) + .map((MapFunction) p -> p.getId(), Encoders.STRING())) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingPath + "/resultIds"); + + org.apache.spark.sql.Dataset resultIds = spark.read().textFile(workingPath + "/resultIds"); + + org.apache.spark.sql.Dataset oksource = communityResultIds + .joinWith(relations, communityResultIds.col("value").equalTo(relations.col("source"))) + .map( + (MapFunction, Relation>) t2 -> t2._2(), + Encoders.bean(Relation.class)); + oksource + .joinWith(resultIds, oksource.col("target").equalTo(resultIds.col("value"))) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/relation"); + + writeNodes( + spark, inputPath + "/publication", Publication.class, outputPath + "/publication", + outputPath + "/relation", workingPath); + writeNodes( + spark, inputPath + "/dataset", Dataset.class, outputPath + "/dataset", outputPath + "/relation", + workingPath); + writeNodes( + spark, inputPath + "/software", Software.class, outputPath + "/software", outputPath + "/relation", + workingPath); + writeNodes( + spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class, + outputPath + "/otherresearchproduct", outputPath + "/relation", workingPath); + + } + + private static void writeNodes(SparkSession spark, String inputPath, Class clazz, + String outputPath, String relationPath, String workingPath) { + org.apache.spark.sql.Dataset citingRelations = Utils.readPath(spark, relationPath, Relation.class); + org.apache.spark.sql.Dataset result = Utils + .readPath(spark, inputPath, clazz) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() && + !p.getDataInfo().getInvisible()); + + // take the distinct result id for source and target of the relations + citingRelations + .flatMap( + (FlatMapFunction) r -> Arrays + .asList(r.getSource(), r.getTarget()) + .iterator(), + Encoders.STRING()) + .distinct() + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .text(workingPath + "/relationIds"); + + org.apache.spark.sql.Dataset relationIds = spark.read().textFile(workingPath + "/relationIds"); + + relationIds + .joinWith(result, relationIds.col("value").equalTo(result.col("id"))) + .map((MapFunction, R>) t2 -> t2._2(), Encoders.bean(clazz)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath); + } + + private static void writeCommunityRelatedIds(SparkSession spark, String inputPath, + Class clazz, List communityList, String outputPath, String resultType) { + org.apache.spark.sql.Dataset results = Utils + .readPath(spark, inputPath + "/" + resultType, clazz) + .filter( + (FilterFunction) p -> !p.getDataInfo().getDeletedbyinference() && + !p.getDataInfo().getInvisible() && + isRelatedToCommunities(p, communityList)); + results + .map((MapFunction) Result::getId, Encoders.STRING()) + .write() + .option("compression", "gzip") + .mode(SaveMode.Append) + .text(outputPath + COMMUNITY_RESULT_IDS); + +// results +// // .repartition(10000) +// .write() +// .option("compression", "gzip") +// .mode(SaveMode.Append) +// .json(outputPath + "/" + resultType); + + } + + private static boolean isRelatedToCommunities(R p, List communityList) { + return p + .getContext() + .stream() + .anyMatch( + c -> communityList.contains(c.getId()) || + (c.getId().contains("::") + && communityList.contains(c.getId().substring(0, c.getId().indexOf("::"))))); + } + +} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml index 42fa50a..543de1c 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml @@ -65,7 +65,7 @@ - + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml new file mode 100644 index 0000000..d262cb6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml new file mode 100644 index 0000000..dc9ead6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml @@ -0,0 +1,102 @@ + + + + sourcePath + the source path + + + outputPath + the output path + + + communities + the communities whose products should be dumped + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + yarn + cluster + select results ids connected to communities and dump relation + eu.dnetlib.dhp.oa.graph.dump.serafeim.SparkSelectResultsAndDumpRelations + dump-${projectVersion}.jar + + --executor-memory=10G + --executor-cores=3 + --driver-memory=10G + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath} + --workingPath${workingDir} + --outputPath${outputPath} + --communities${communities} + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + \ No newline at end of file diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpResultTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpResultTest.java index 9ef6ea1..4f3c4f9 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpResultTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpResultTest.java @@ -13,7 +13,10 @@ import java.util.Optional; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.dom4j.Document; @@ -30,8 +33,12 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.csv.model.CSVResult; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.utils.DHPUtils; +import scala.Function1; /** * @author miriam.baglioni @@ -96,7 +103,7 @@ public class DumpResultTest { SparkDumpResults.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/output", + "-workingPath", workingDir.toString() + "/working", "-resultType", "publication", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", @@ -105,61 +112,69 @@ public class DumpResultTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - Dataset tmp = spark - .read() - .option("header", "true") - .option("delimiter", Constants.SEP) - .csv(workingDir.toString() + "/working/publication/result"); + Dataset tmp = Utils + .readPath(spark, workingDir.toString() + "/working/publication/result", CSVResult.class); - Assertions.assertEquals(3, tmp.count()); - Row row = tmp - .where("id = '50|DansKnawCris::0224aae28af558f21768dbc6439c7a95'") + tmp.show(false); + + Assertions.assertEquals(4, tmp.count()); + CSVResult row = tmp + .filter( + (FilterFunction) r -> r.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")) .first(); - Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright")); - Assertions.assertEquals("FI", row.getAs("country")); - Assertions.assertEquals("Lit.opg., bijl.", row.getAs("description")); - Assertions.assertEquals(3, split(row.getAs("keywords"), ", ").length); - Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie")); - Assertions.assertTrue(row.getAs("keywords").toString().contains("prospectie")); - Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology")); - Assertions.assertEquals("nl", row.getAs("language")); - Assertions.assertEquals("2007-01-01", row.getAs("publication_date")); - Assertions.assertEquals("FakePublisher1", row.getAs("publisher")); + Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAccessright()); + Assertions.assertEquals("FI", row.getCountry()); + Assertions.assertEquals("Lit.opg., bijl.", row.getDescription()); + Assertions.assertEquals(3, split(row.getKeywords(), ", ").length); + Assertions.assertTrue(row.getKeywords().toString().contains("archeologie")); + Assertions.assertTrue(row.getKeywords().toString().contains("prospectie")); + Assertions.assertTrue(row.getKeywords().toString().contains("archaeology")); + Assertions.assertEquals("nl", row.getLanguage()); + Assertions.assertEquals("2007-01-01", row.getPublication_date()); + Assertions.assertEquals("FakePublisher1", row.getPublisher()); Assertions .assertEquals( "Inventariserend veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel", - row.getAs("title")); - Assertions.assertEquals("publication", row.getAs("type")); + row.getTitle()); + Assertions.assertEquals("publication", row.getType()); row = tmp - .where("id = '50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9'") + .filter( + (FilterFunction) r -> r.getId().equals("50|doi_________::715fec7723208e6f17e855c204656e2f")) .first(); - Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright")); - Assertions.assertEquals(2, split(row.getAs("country"), ", ").length); - Assertions.assertNull(row.getAs("description")); - Assertions.assertEquals(2, split(row.getAs("keywords"), ", ").length); - Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie")); - Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology")); - Assertions.assertEquals("UNKNOWN", row.getAs("language")); - Assertions.assertNull(row.getAs("publication_date")); - Assertions.assertNull(row.getAs("publisher")); - Assertions.assertEquals("None", row.getAs("title")); - Assertions.assertEquals("publication", row.getAs("type")); - row = tmp - .where("id = '50|DansKnawCris::26780065282e607306372abd0d808245'") - .first(); - Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright")); - Assertions.assertNull(row.getAs("country")); - Assertions.assertNull(row.getAs("description")); - Assertions.assertEquals(2, split(row.getAs("keywords"), ", ").length); - Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie")); - Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology")); - Assertions.assertEquals("UNKNOWN", row.getAs("language")); - Assertions.assertNull(row.getAs("publication_date")); - Assertions.assertNull(row.getAs("publisher")); - Assertions.assertEquals("None", row.getAs("title")); - Assertions.assertEquals("publication", row.getAs("type")); + System.out.println(row.getPublisher()); + String a = row.getPublisher().replace("\\n", " "); + System.out.println(a); +// row = tmp +// .where("id = '50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9'") +// .first(); +// Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright")); +// Assertions.assertEquals(2, split(row.getAs("country"), ", ").length); +// Assertions.assertNull(row.getAs("description")); +// Assertions.assertEquals(2, split(row.getAs("keywords"), ", ").length); +// Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie")); +// Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology")); +// Assertions.assertEquals("UNKNOWN", row.getAs("language")); +// Assertions.assertNull(row.getAs("publication_date")); +// Assertions.assertNull(row.getAs("publisher")); +// Assertions.assertEquals("None", row.getAs("title")); +// Assertions.assertEquals("publication", row.getAs("type")); +// +// row = tmp +// .where("id = '50|DansKnawCris::26780065282e607306372abd0d808245'") +// .first(); +// Assertions.assertEquals(ModelConstants.OPEN_ACCESS_RIGHT().getClassid(), row.getAs("accessright")); +// Assertions.assertNull(row.getAs("country")); +// Assertions.assertNull(row.getAs("description")); +// Assertions.assertEquals(2, split(row.getAs("keywords"), ", ").length); +// Assertions.assertTrue(row.getAs("keywords").toString().contains("archeologie")); +// Assertions.assertTrue(row.getAs("keywords").toString().contains("archaeology")); +// Assertions.assertEquals("UNKNOWN", row.getAs("language")); +// Assertions.assertNull(row.getAs("publication_date")); +// Assertions.assertNull(row.getAs("publisher")); +// Assertions.assertEquals("None", row.getAs("title")); +// Assertions.assertEquals("publication", row.getAs("type")); } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/MoveOnSingleDirTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/MoveOnSingleDirTest.java index dd8efca..279ba40 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/MoveOnSingleDirTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/csv/MoveOnSingleDirTest.java @@ -98,12 +98,14 @@ public class MoveOnSingleDirTest { .option("delimiter", Constants.SEP) .csv(workingDir.toString() + "/output/result"); - Assertions.assertEquals(21, tmp.count()); + Assertions.assertEquals(22, tmp.count()); Assertions.assertEquals(12, tmp.filter("type == 'dataset'").count()); Assertions.assertEquals(4, tmp.filter("type == 'other'").count()); - Assertions.assertEquals(4, tmp.filter("type == 'publication'").count()); + Assertions.assertEquals(5, tmp.filter("type == 'publication'").count()); Assertions.assertEquals(1, tmp.filter("type == 'software'").count()); + tmp.filter("type == 'publication'").show(false); + Assertions .assertEquals( 8, spark diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/publication b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/publication index 2b003e7..9ccfc32 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/publication +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/publication @@ -1,4 +1,5 @@ {"author":[{"affiliation":[],"fullname":"Alrasheed, Maryam","name":"Maryam","pid":[],"rank":1,"surname":"Alrasheed"},{"affiliation":[],"fullname":"Blondin, Michael","name":"Michael","pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-2914-2734"}],"rank":1,"surname":"Blondin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"enermaps"}, {"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"},{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[{"classid":"FI","classname":"Finland","dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"country:instrepos","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"schemeid":"dnet:countries","schemename":"dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T16:14:18.452Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg., bijl."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282676557,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:51:24Z","harvestDate":"2020-05-25T11:33:13.427Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013110","metadataNamespace":""}},"originalId":["DansKnawCris::0224aae28af558f21768dbc6439c7a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1023/fakedoi"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"FakePublisher1"},"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Inventariserend veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra Archeologie Rapportenreeks P0502381"}],"journal":null} {"author":[{"affiliation":[],"fullname":"Blondin, Michael","name":"Michael","pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-2914-2734"}],"rank":1,"surname":"Blondin"},{"affiliation":[],"fullname":"Raskin, Mikhail","name":"Mikhail","pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6660-5673"}],"rank":2,"surname":"Raskin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"enermaps"}],"contributor":[],"country":[{"classid":"IT","classname":"Finland","dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"country:instrepos","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"schemeid":"dnet:countries","schemename":"dnet:countries"},{"classid":"FI","classname":"Finland","dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"country:instrepos","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.85"},"schemeid":"dnet:countries","schemename":"dnet:countries"}],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:03:57.761Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283087415,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:58:39Z","harvestDate":"2020-05-25T11:34:38.707Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800020324","metadataNamespace":""}},"originalId":["DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} {"author":[{"affiliation":[],"fullname":"Ward, Mark Daniel","name":"Mark Daniel","pid":[],"rank":1,"surname":"Ward"},{"affiliation":[],"fullname":"Szpankowski, Wojciech","name":"Wojciech","pid":[],"rank":2,"surname":"Szpankowski"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:13:23.976Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::26780065282e607306372abd0d808245","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282897527,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:42:33Z","harvestDate":"2020-05-25T11:40:10.845Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550053196","metadataNamespace":""}},"originalId":["DansKnawCris::26780065282e607306372abd0d808245"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} -{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"enermaps"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:13:23.976Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::26780065282e607306372abd0d80fake","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282897527,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:42:33Z","harvestDate":"2020-05-25T11:40:10.845Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550053196","metadataNamespace":""}},"originalId":["DansKnawCris::26780065282e607306372abd0d808245"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} \ No newline at end of file +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"enermaps"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:13:23.976Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::26780065282e607306372abd0d80fake","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282897527,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:42:33Z","harvestDate":"2020-05-25T11:40:10.845Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550053196","metadataNamespace":""}},"originalId":["DansKnawCris::26780065282e607306372abd0d808245"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "0013", "classname": "Part of book or chapter of book", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1090/dimacs/044/20"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemename": "dnet:dataCite_date", "schemeid": "dnet:dataCite_date"}, "value": "2017-04-27"}, {"qualifier": {"classid": "published-print", "classname": "published-print", "schemename": "dnet:dataCite_date", "schemeid": "dnet:dataCite_date"}, "value": "1998-10-19"}], "collectedfrom": [{"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "id":"50|doi_________::715fec7723208e6f17e855c204656e2f", "subject": [], "dateofacceptance": {"value": "1998-10-19"}, "lastupdatetimestamp": 1675978002598, "author": [{"surname": "Deaton", "fullname": "R. Deaton", "pid": [], "name": "R.", "rank": 1}, {"surname": "Murphy", "fullname": "R. Murphy", "pid": [], "name": "R.", "rank": 2}, {"surname": "Garzon", "fullname": "M. Garzon", "pid": [], "name": "M.", "rank": 3}, {"surname": "Franceschetti", "fullname": "D. Franceschetti", "pid": [], "name": "D.", "rank": 4}, {"surname": "Stevens", "fullname": "S. Stevens", "pid": [], "name": "S.", "rank": 5}], "instance": [{"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemename": "dnet:review_levels", "schemeid": "dnet:review_levels"}, "collectedfrom": {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, "hostedby": {"dataInfo": {"invisible": false, "deletedbyinference": false}, "value": "Unknown Repository", "key": "10|openaire____::55045bd2a65019fd8e6741a755395c8c"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "dateofacceptance": {"value": "1998-10-19"}, "url": ["https://doi.org/10.1090/dimacs/044/20"], "measures": [{"id": "influence", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "1.9184702E-8", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "popularity", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "5.79069E-9", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "influence_alt", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "51", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "popularity_alt", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "0.8491071", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "impulse", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "2", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}], "pid": [{"qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1090/dimacs/044/20"}], "instancetype": {"classid": "0013", "classname": "Part of book or chapter of book", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "dateofcollection": "2023-02-09T21:26:42Z", "fulltext": [], "description": [], "format": [], "measures": [{"id": "influence", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "1.7008906E-8", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "popularity", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "5.1452145E-9", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "influence_alt", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "51", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "popularity_alt", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "0.50946426", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}, {"id": "impulse", "unit": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "2", "key": "score"}, {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "value": "C", "key": "class"}]}], "coverage": [], "externalReference": [], "publisher": {"value": "American Mathematical\\n Society"}, "eoscifguidelines": [], "language": {"classid": "und", "classname": "Undetermined", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.1090/dimacs/044/20", "50|doiboost____::715fec7723208e6f17e855c204656e2f"], "source": [{"value": "Crossref"}], "context": [], "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Good encodings for DNA-based solutions to combinatorial problems"}]} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/relation b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/relation index 1a9a370..9987812 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/relation +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/input/relation @@ -1,4 +1,5 @@ {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::26780065282e607306372abd0d808245","subRelType":"provision","target":"50|DansKnawCris::26780065282e607306372abd0d808246"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|DansKnawCris::26780065282e607306372abd0d808245"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"} \ No newline at end of file +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"Cites","relType":"datasourceOrganization","source":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","subRelType":"provision","target":"50|doi_________::715fec7723208e6f17e855c204656e2f"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result/part0 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result/part0 index c264de7..704eacd 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result/part0 +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/publication/result/part0 @@ -1,4 +1,5 @@ {"accessright":"OPEN","country":"","description":"We describe the CoNLL-2002 shared task: language-independent named entity recognition. We give background information on the data sets and the evaluation method, present a general overview of the systems that have taken part in the task and discuss their performance.","id":"50|doi_dedup___::13b14c741a7b3420591c161f54ed5c80","keywords":"computer science - computation and language, i.2.7, computation and language (cs.cl), fos: computer and information sciences","language":"eng","publication_date":"2002-09-05","publisher":"","title":"Introduction to the CoNLL-2002 Shared Task: Language-Independent Named Entity Recognition","type":"publication"} {"accessright":"OPEN","country":"GB","description":"Following a strategy similar to that used in baker's yeast (Herrgård et al. Nat Biotechnol 26:1155-1160, 2008). A consensus yeast metabolic network obtained from a community approach to systems biology (Herrgård et al. 2008; Dobson et al. BMC Syst Biol 4:145, 2010). Further developments towards a genome-scale metabolic model of yeast (Dobson et al. 2010; Heavner et al. BMC Syst Biol 6:55, 2012). Yeast 5-an expanded reconstruction of the Saccharomyces cerevisiae metabolic network (Heavner et al. 2012) and in Salmonella typhimurium (Thiele et al. BMC Syst Biol 5:8, 2011). A community effort towards a knowledge-base and mathematical model of the human pathogen Salmonellatyphimurium LT2 (Thiele et al. 2011), a recent paper (Thiele et al. Nat Biotechnol 31:419-425, 2013). A community-driven global reconstruction of human metabolism (Thiele et al. 2013) described a much improved 'community consensus' reconstruction of the human metabolic network, called Recon 2, and the authors (that include the present ones) have made it freely available via a database at http://humanmetabolism.org/ and in SBML format at Biomodels (http://identifiers.org/biomodels.db/MODEL1109130000. This short analysis summarises the main findings, and suggests some approaches that will be able to exploit the availability of this model to advantage. © 2013 The Author(s).","id":"50|doi_dedup___::e0392f427fea9a701aa469e6f24bdf93","keywords":"review article, metabolism, modelling, systems biology, networks, metabolic networks, clinical biochemistry, biochemistry, endocrinology, diabetes and metabolism, community approach, operations research, metabolic network, human metabolism, metabolic model, biology, computational biology, sbml, 03 medical and health sciences, 0302 clinical medicine, 0303 health sciences, 030220 oncology & carcinogenesis, 030304 developmental biology, researchinstitutes_networks_beacons/manchester_institute_of_biotechnology, manchester institute of biotechnology","language":"eng","publication_date":"2013-08-01","publisher":"Springer US","title":"An analysis of a ‘community-driven’ reconstruction of the human metabolic network","type":"publication"} {"accessright":"OPEN","country":"","description":"Current machine learning systems operate, almost exclusively, in a statistical, or model-free mode, which entails severe theoretical limits on their power and performance. Such systems cannot reason about interventions and retrospection and, therefore, cannot serve as the basis for strong AI. To achieve human level intelligence, learning machines need the guidance of a model of reality, similar to the ones used in causal inference tasks. To demonstrate the essential role of such models, I will present a summary of seven tasks which are beyond reach of current machine learning systems and which have been accomplished using the tools of causal modeling.","id":"50|doi_dedup___::2436e90941a664931b54b956ade5b77b","keywords":"machine learning (cs.lg), artificial intelligence (cs.ai), machine learning (stat.ml), fos: computer and information sciences, mode (statistics), causal inference, artificial intelligence, business.industry, business, power (physics), computer science, machine learning, computer.software_genre, computer, basis (linear algebra), 03 medical and health sciences, 02 engineering and technology, 0202 electrical engineering, electronic engineering, information engineering, 0301 basic medicine, 020201 artificial intelligence & image processing, 030104 developmental biology, computer science - learning, computer science - artificial intelligence, statistics - machine learning","language":"und","publication_date":"2018-02-02","publisher":"arXiv","title":"Theoretical Impediments to Machine Learning With Seven Sparks from the Causal Revolution","type":"publication"} -{"accessright":"OPEN","country":"","description":"In most natural and engineered systems, a set of entities interact with each other in complicated patterns that can encompass multiple types of relationships, change in time, and include other types of complications. Such systems include multiple subsystems and layers of connectivity, and it is important to take such \"multilayer\" features into account to try to improve our understanding of complex systems. Consequently, it is necessary to generalize \"traditional\" network theory by developing (and validating) a framework and associated tools to study multilayer systems in a comprehensive fashion. The origins of such efforts date back several decades and arose in multiple disciplines, and now the study of multilayer networks has become one of the most important directions in network science. In this paper, we discuss the history of multilayer networks (and related concepts) and review the exploding body of work on such networks. To unify the disparate terminology in the large body of recent work, we discuss a general framework for multilayer networks, construct a dictionary of terminology to relate the numerous existing concepts to each other, and provide a thorough discussion that compares, contrasts, and translates between related notions such as multilayer networks, multiplex networks, interdependent networks, networks of networks, and many others. We also survey and discuss existing data sets that can be represented as multilayer networks. We review attempts to generalize single-layer-network diagnostics to multilayer networks. We also discuss the rapidly expanding research on multilayer-network models and notions like community structure, connected components, tensor decompositions, and various types of dynamical processes on multilayer networks. We conclude with a summary and an outlook.","id":"50|doi_dedup___::c5a574592f2e347f27be49d2c20a5558","keywords":"applied mathematics, computational mathematics, control and optimization, management science and operations research, computer networks and communications, data science, connected component, terminology, complex system, network theory, network science, construct (philosophy), computer science, interdependent networks, set (psychology), 01 natural sciences, 0103 physical sciences, 010306 general physics, 010305 fluids & plasmas, physics - physics and society, computer science - social and information networks, physics and society (physics.soc-ph), social and information networks (cs.si), fos: physical sciences, fos: computer and information sciences","language":"und","publication_date":"2013-09-27","publisher":"Oxford University Press (OUP)","title":"Multilayer networks","type":"publication"} \ No newline at end of file +{"accessright":"OPEN","country":"","description":"In most natural and engineered systems, a set of entities interact with each other in complicated patterns that can encompass multiple types of relationships, change in time, and include other types of complications. Such systems include multiple subsystems and layers of connectivity, and it is important to take such \"multilayer\" features into account to try to improve our understanding of complex systems. Consequently, it is necessary to generalize \"traditional\" network theory by developing (and validating) a framework and associated tools to study multilayer systems in a comprehensive fashion. The origins of such efforts date back several decades and arose in multiple disciplines, and now the study of multilayer networks has become one of the most important directions in network science. In this paper, we discuss the history of multilayer networks (and related concepts) and review the exploding body of work on such networks. To unify the disparate terminology in the large body of recent work, we discuss a general framework for multilayer networks, construct a dictionary of terminology to relate the numerous existing concepts to each other, and provide a thorough discussion that compares, contrasts, and translates between related notions such as multilayer networks, multiplex networks, interdependent networks, networks of networks, and many others. We also survey and discuss existing data sets that can be represented as multilayer networks. We review attempts to generalize single-layer-network diagnostics to multilayer networks. We also discuss the rapidly expanding research on multilayer-network models and notions like community structure, connected components, tensor decompositions, and various types of dynamical processes on multilayer networks. We conclude with a summary and an outlook.","id":"50|doi_dedup___::c5a574592f2e347f27be49d2c20a5558","keywords":"applied mathematics, computational mathematics, control and optimization, management science and operations research, computer networks and communications, data science, connected component, terminology, complex system, network theory, network science, construct (philosophy), computer science, interdependent networks, set (psychology), 01 natural sciences, 0103 physical sciences, 010306 general physics, 010305 fluids & plasmas, physics - physics and society, computer science - social and information networks, physics and society (physics.soc-ph), social and information networks (cs.si), fos: physical sciences, fos: computer and information sciences","language":"und","publication_date":"2013-09-27","publisher":"Oxford University Press (OUP)","title":"Multilayer networks","type":"publication"} +{"accessright":"UNKNOWN","country":"","description":"","id":"50|doi_________::715fec7723208e6f17e855c204656e2f","keywords":"","language":"und","publication_date":"1998-10-19","publisher":"American Mathematical\\n Society","title":"Good encodings for DNA-based solutions to combinatorial problems","type":"publication"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00000 b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00000 index e69de29..23c80f9 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00000 +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/csv/working/resultIds/part-00000 @@ -0,0 +1 @@ +50|doi_________::715fec7723208e6f17e855c204656e2f \ No newline at end of file diff --git a/pom.xml b/pom.xml index 7f650b4..ee4ec11 100644 --- a/pom.xml +++ b/pom.xml @@ -102,7 +102,7 @@ 5.6.1 3.5 11.0.2 - [2.12.1] + [2.13.1-patched] \ No newline at end of file