From f2118d771aee924e47c77c6b603cd4086c90d7ef Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Sep 2021 15:18:05 +0200 Subject: [PATCH 01/60] first steps in the implementation of the integration of opencitations --- .../dnetlib/dhp/common/collection/GetCSV.java | 1 - .../ExtractOpenCitationRefs.java | 63 +++++++++++++ .../opencitations/GetOpenCitationsRefs.java | 90 +++++++++++++++++++ .../opencitations/input_parameters.json | 20 +++++ .../oozie_app/config-default.xml | 58 ++++++++++++ .../opencitations/oozie_app/download.sh | 2 + .../opencitations/oozie_app/workflow.xml | 64 +++++++++++++ .../opencitations_parameters.json | 8 ++ .../eu/dnetlib/dhp/PropagationConstant.java | 11 ++- .../SparkOrcidToResultFromSemRelJob.java | 7 +- ...kResultToCommunityFromOrganizationJob.java | 4 +- ...parkResultToCommunityThroughSemRelJob.java | 4 +- 12 files changed, 322 insertions(+), 10 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ExtractOpenCitationRefs.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/download.sh create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/opencitations_parameters.json diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java index 44e19142cb..9696975cdd 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -10,7 +10,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.fasterxml.jackson.databind.ObjectMapper; -import com.opencsv.bean.CsvToBeanBuilder; public class GetCSV { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ExtractOpenCitationRefs.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ExtractOpenCitationRefs.java new file mode 100644 index 0000000000..58ec37e65a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ExtractOpenCitationRefs.java @@ -0,0 +1,63 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import java.io.BufferedOutputStream; +import java.net.URI; +import java.util.zip.GZIPOutputStream; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.mortbay.log.Log; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class ExtractOpenCitationRefs { + public static void main(String[] args) throws Exception { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + ExtractOpenCitationRefs.class + .getResourceAsStream( + "/eu/dnetlib/dhp/a/ccionmanager/opencitations/opencitations_parameters.json"))); + parser.parseArgument(args); + final String hdfsServerUri = parser.get("hdfsServerUri"); + final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); + final String outputPath = parser.get("outputPath"); + final String opencitationFile = parser.get("opencitationFile"); + + Path hdfsreadpath = new Path(workingPath.concat("/").concat(opencitationFile)); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", workingPath); + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + FileSystem fs = FileSystem.get(URI.create(workingPath), conf); + FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); + try (TarArchiveInputStream tais = new TarArchiveInputStream( + new GzipCompressorInputStream(crossrefFileStream))) { + TarArchiveEntry entry = null; + while ((entry = tais.getNextTarEntry()) != null) { + if (!entry.isDirectory()) { + try ( + FSDataOutputStream out = fs + .create(new Path(outputPath.concat(entry.getName()).concat(".gz"))); + GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { + + IOUtils.copy(tais, gzipOs); + + } + + } + } + } + Log.info("Crossref dump reading completed"); + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java new file mode 100644 index 0000000000..ea3bdf9b36 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java @@ -0,0 +1,90 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import java.io.*; +import java.io.Serializable; +import java.util.Objects; +import java.util.zip.GZIPOutputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetOpenCitationsRefs implements Serializable { + private static final Logger log = LoggerFactory.getLogger(GetOpenCitationsRefs.class); + + public static void main(final String[] args) throws IOException, ParseException { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + GetOpenCitationsRefs.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json")))); + + parser.parseArgument(args); + + final String inputFile = parser.get("inputFile"); + log.info("inputFile {}", inputFile); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + new GetOpenCitationsRefs().doExtract(inputFile, workingPath, fileSystem); + } + + private void doExtract(String inputFile, String workingPath, FileSystem fileSystem) + throws IOException { + + final Path path = new Path(inputFile); + + FSDataInputStream oc_zip = fileSystem.open(path); + + int count = 1; + try (ZipInputStream zis = new ZipInputStream(oc_zip)) { + ZipEntry entry = null; + while ((entry = zis.getNextEntry()) != null) { + + if (!entry.isDirectory()) { + String fileName = entry.getName(); + fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count; + count++; + try ( + FSDataOutputStream out = fileSystem + .create(new Path(workingPath + "/COCI/" + fileName + ".gz")); + GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { + + IOUtils.copy(zis, gzipOs); + + } + } + + } + + } + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json new file mode 100644 index 0000000000..4910ad11d7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/input_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "if", + "paramLongName": "inputFile", + "paramDescription": "the zipped opencitations file", + "paramRequired": true + }, + { + "paramName": "wp", + "paramLongName": "workingPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "hnn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the hdfs name node", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/config-default.xml new file mode 100644 index 0000000000..a1755f329b --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + oozie.launcher.mapreduce.user.classpath.first + true + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/download.sh b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/download.sh new file mode 100644 index 0000000000..54f66287c5 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget -i $1 \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml new file mode 100644 index 0000000000..90d5e9eee9 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -0,0 +1,64 @@ + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + ${crossrefdumptoken} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs + --hdfsNameNode${nameNode} + --inputFile${inputFile} + --workingPath${workingDir}/OpenCitations + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/opencitations_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/opencitations_parameters.json new file mode 100644 index 0000000000..258d6816e9 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/opencitations_parameters.json @@ -0,0 +1,8 @@ +[ + {"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true}, + {"paramName":"f", "paramLongName":"opencitationFile", "paramDescription": "the name of the file", "paramRequired": true}, + {"paramName":"issm", "paramLongName":"isSparkSessionManaged", "paramDescription": "the name of the activities orcid file", "paramRequired": false}, + {"paramName":"o", "paramLongName":"outputPath", "paramDescription": "the name of the activities orcid file", "paramRequired": true} + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 0d7c74475e..23e97a97a8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -69,7 +69,7 @@ public class PropagationConstant { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + ModelConstants.DNET_PROVENANCE_ACTIONS)); return nc; } @@ -84,7 +84,8 @@ public class PropagationConstant { return di; } - public static Qualifier getQualifier(String inference_class_id, String inference_class_name, String qualifierSchema) { + public static Qualifier getQualifier(String inference_class_id, String inference_class_name, + String qualifierSchema) { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); @@ -108,7 +109,11 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name, ModelConstants.DNET_PROVENANCE_ACTIONS)); + r + .setDataInfo( + getDataInfo( + inference_provenance, inference_class_id, inference_class_name, + ModelConstants.DNET_PROVENANCE_ACTIONS)); return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 68949b9004..a38b4da2e8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -173,14 +173,17 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); + p + .setQualifier( + getQualifier( + ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); p .setDataInfo( getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + ModelConstants.DNET_PROVENANCE_ACTIONS)); Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 1289ff644f..50df08f8c8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -10,7 +10,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -22,6 +21,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -130,7 +130,7 @@ public class SparkResultToCommunityFromOrganizationJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + ModelConstants.DNET_PROVENANCE_ACTIONS))); propagatedContexts.add(newContext); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 7f76ead94b..f31a262307 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -20,6 +19,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; @@ -126,7 +126,7 @@ public class SparkResultToCommunityThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + ModelConstants.DNET_PROVENANCE_ACTIONS))); return newContext; } return null; From 5ec69889db0f07d247ce423da1c5efe09961e9b4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 27 Sep 2021 16:02:06 +0200 Subject: [PATCH 02/60] OpenCitations: creation of AS from OC --- .../dnetlib/dhp/common/collection/GetCSV.java | 1 + .../CreateActionSetSparkJob.java | 178 +++++++++++ .../opencitations/CreateRelationsJson.java | 176 ++++++++++ .../ExtractOpenCitationRefs.java | 63 ---- .../opencitations/GetOpenCitationsRefs.java | 13 +- .../opencitations/OpenCitationModel.java | 5 + .../opencitations/as_parameters.json | 25 ++ .../opencitations/oozie_app/download.sh | 2 +- .../opencitations/oozie_app/workflow.xml | 45 ++- .../CreateOpenCitationsASTest.java | 301 ++++++++++++++++++ .../opencitations/inputFiles/input1 | 8 + .../opencitations/inputFiles/input2 | 8 + .../opencitations/inputFiles/input3 | 9 + pom.xml | 2 +- 14 files changed, 757 insertions(+), 79 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateRelationsJson.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ExtractOpenCitationRefs.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/OpenCitationModel.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java index 9696975cdd..44e19142cb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -10,6 +10,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; public class GetCSV { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java new file mode 100644 index 0000000000..9486a74ce2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -0,0 +1,178 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.*; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import scala.Tuple2; + +public class CreateActionSetSparkJob implements Serializable { + public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations"; + public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations"; + private static final String ID_PREFIX = "50|doi_________::"; + private static final String TRUST = "0.91"; + + private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws IOException, ParseException { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + CreateActionSetSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json")))); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}", inputPath.toString()); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final boolean shouldDuplicateRels = Boolean.valueOf(parser.get("shouldDuplicateRels")); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + extractContent(spark, inputPath, outputPath, shouldDuplicateRels); + }); + + } + + private static void extractContent(SparkSession spark, String inputPath, String outputPath, + boolean shouldDuplicateRels) { + spark + .sqlContext() + .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) + .flatMap( + (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), + Encoders.bean(Relation.class)) + .filter((FilterFunction) value -> value != null) + .toJavaRDD() + .map(p -> new AtomicAction(p.getClass(), p)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); + + } + + private static List createRelation(String value, boolean duplicate) { + String[] line = value.split(","); + if (!line[1].startsWith("10.")) { + return new ArrayList<>(); + } + List relationList = new ArrayList<>(); + + String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1])); + final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2])); + + relationList + .addAll( + getRelations( + citing, + cited)); + + if (duplicate && line[1].endsWith(".refs")) { + citing = ID_PREFIX + IdentifierFactory + .md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs")))); + relationList.addAll(getRelations(citing, cited)); + } + + return relationList; + } + + private static Collection getRelations(String citing, String cited) { + + return Arrays + .asList( + getRelation(citing, cited, ModelConstants.CITES), + getRelation(cited, citing, ModelConstants.IS_CITED_BY)); + } + + public static Relation getRelation( + String source, + String target, + String relclass) { + Relation r = new Relation(); + r.setCollectedfrom(getCollectedFrom()); + r.setSource(source); + r.setTarget(target); + r.setRelClass(relclass); + r.setRelType(ModelConstants.RESULT_RESULT); + r.setSubRelType(ModelConstants.CITATION); + r + .setDataInfo( + getDataInfo()); + return r; + } + + public static List getCollectedFrom() { + KeyValue kv = new KeyValue(); + kv.setKey(ModelConstants.OPENOCITATIONS_ID); + kv.setValue(ModelConstants.OPENOCITATIONS_NAME); + + return Arrays.asList(kv); + } + + public static DataInfo getDataInfo() { + DataInfo di = new DataInfo(); + di.setInferred(false); + di.setDeletedbyinference(false); + di.setTrust(TRUST); + + di + .setProvenanceaction( + getQualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS)); + return di; + } + + public static Qualifier getQualifier(String class_id, String class_name, + String qualifierSchema) { + Qualifier pa = new Qualifier(); + pa.setClassid(class_id); + pa.setClassname(class_name); + pa.setSchemeid(qualifierSchema); + pa.setSchemename(qualifierSchema); + return pa; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateRelationsJson.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateRelationsJson.java new file mode 100644 index 0000000000..4996a30898 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateRelationsJson.java @@ -0,0 +1,176 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.*; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import scala.Tuple2; + +public class CreateRelationsJson implements Serializable { + public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations"; + public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations"; + private static final String ID_PREFIX = "50|doi_________::"; + private static final String TRUST = "0.91"; + + private static final Logger log = LoggerFactory.getLogger(CreateRelationsJson.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws IOException, ParseException { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + CreateRelationsJson.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json")))); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}", inputPath.toString()); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + extractContent(spark, inputPath, outputPath); + }); + + } + + private static void extractContent(SparkSession spark, String inputPath, String outputPath) { + spark + .sqlContext() + .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) + .flatMap( + (FlatMapFunction) value -> createRelation(value).iterator(), + Encoders.bean(Relation.class)) + .filter((FilterFunction) value -> value != null) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + + } + + private static List createRelation(String value) { + String[] line = value.split(","); + if (!line[1].startsWith("10.")) { + return new ArrayList<>(); + } + List relationList = new ArrayList<>(); + + String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1])); + final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2])); + + relationList + .addAll( + getRelations( + citing, + cited)); + + if (line[1].endsWith(".refs")) { + citing = ID_PREFIX + IdentifierFactory + .md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs")))); + relationList.addAll(getRelations(citing, cited)); + } + + return relationList; + } + + private static Collection getRelations(String citing, String cited) { + + return Arrays + .asList( + getRelation(citing, cited, ModelConstants.CITES), + getRelation(cited, citing, ModelConstants.IS_CITED_BY)); + } + + public static Relation getRelation( + String source, + String target, + String relclass) { + Relation r = new Relation(); + r.setCollectedfrom(getCollectedFrom()); + r.setSource(source); + r.setTarget(target); + r.setRelClass(relclass); + r.setRelType(ModelConstants.RESULT_RESULT); + r.setSubRelType(ModelConstants.CITATION); + r + .setDataInfo( + getDataInfo()); + return r; + } + + public static List getCollectedFrom() { + KeyValue kv = new KeyValue(); + kv.setKey(ModelConstants.OPENOCITATIONS_ID); + kv.setValue(ModelConstants.OPENOCITATIONS_NAME); + + return Arrays.asList(kv); + } + + public static DataInfo getDataInfo() { + DataInfo di = new DataInfo(); + di.setInferred(false); + di.setDeletedbyinference(false); + di.setTrust(TRUST); + + di + .setProvenanceaction( + getQualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS)); + return di; + } + + public static Qualifier getQualifier(String class_id, String class_name, + String qualifierSchema) { + Qualifier pa = new Qualifier(); + pa.setClassid(class_id); + pa.setClassname(class_name); + pa.setSchemeid(qualifierSchema); + pa.setSchemename(qualifierSchema); + return pa; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ExtractOpenCitationRefs.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ExtractOpenCitationRefs.java deleted file mode 100644 index 58ec37e65a..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/ExtractOpenCitationRefs.java +++ /dev/null @@ -1,63 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.opencitations; - -import java.io.BufferedOutputStream; -import java.net.URI; -import java.util.zip.GZIPOutputStream; - -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.mortbay.log.Log; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class ExtractOpenCitationRefs { - public static void main(String[] args) throws Exception { - - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - ExtractOpenCitationRefs.class - .getResourceAsStream( - "/eu/dnetlib/dhp/a/ccionmanager/opencitations/opencitations_parameters.json"))); - parser.parseArgument(args); - final String hdfsServerUri = parser.get("hdfsServerUri"); - final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); - final String outputPath = parser.get("outputPath"); - final String opencitationFile = parser.get("opencitationFile"); - - Path hdfsreadpath = new Path(workingPath.concat("/").concat(opencitationFile)); - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", workingPath); - conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); - conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - FileSystem fs = FileSystem.get(URI.create(workingPath), conf); - FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); - try (TarArchiveInputStream tais = new TarArchiveInputStream( - new GzipCompressorInputStream(crossrefFileStream))) { - TarArchiveEntry entry = null; - while ((entry = tais.getNextTarEntry()) != null) { - if (!entry.isDirectory()) { - try ( - FSDataOutputStream out = fs - .create(new Path(outputPath.concat(entry.getName()).concat(".gz"))); - GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { - - IOUtils.copy(tais, gzipOs); - - } - - } - } - } - Log.info("Crossref dump reading completed"); - - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java index ea3bdf9b36..3530c9980e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/GetOpenCitationsRefs.java @@ -36,8 +36,8 @@ public class GetOpenCitationsRefs implements Serializable { parser.parseArgument(args); - final String inputFile = parser.get("inputFile"); - log.info("inputFile {}", inputFile); + final String[] inputFile = parser.get("inputFile").split(";"); + log.info("inputFile {}", inputFile.toString()); final String workingPath = parser.get("workingPath"); log.info("workingPath {}", workingPath); @@ -45,14 +45,17 @@ public class GetOpenCitationsRefs implements Serializable { final String hdfsNameNode = parser.get("hdfsNameNode"); log.info("hdfsNameNode {}", hdfsNameNode); - - Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); - new GetOpenCitationsRefs().doExtract(inputFile, workingPath, fileSystem); + GetOpenCitationsRefs ocr = new GetOpenCitationsRefs(); + + for (String file : inputFile) { + ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem); + } + } private void doExtract(String inputFile, String workingPath, FileSystem fileSystem) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/OpenCitationModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/OpenCitationModel.java new file mode 100644 index 0000000000..2da96084e3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/OpenCitationModel.java @@ -0,0 +1,5 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +public class OpenCitationModel { +} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json new file mode 100644 index 0000000000..308e020262 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json @@ -0,0 +1,25 @@ +[ + { + "paramName": "ip", + "paramLongName": "inputPath", + "paramDescription": "the zipped opencitations file", + "paramRequired": true + }, + { + "paramName": "op", + "paramLongName": "outputPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the hdfs name node", + "paramRequired": false + }, { + "paramName": "sdr", + "paramLongName": "shouldDuplicateRels", + "paramDescription": "the hdfs name node", + "paramRequired": false +} +] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/download.sh b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/download.sh index 54f66287c5..7a34f3c4ef 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/download.sh +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/download.sh @@ -1,2 +1,2 @@ #!/bin/bash -wget -i $1 \ No newline at end of file +for file in $(echo $1 | tr ";" "\n"); do curl -L $(echo $file | cut -d '@' -f 1 ) | hdfs dfs -put - $2/$(echo $file | cut -d '@' -f 2) ; done; \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml index 90d5e9eee9..d052791a36 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/oozie_app/workflow.xml @@ -20,9 +20,16 @@ + + + + ${wf:conf('resumeFrom') eq 'DownloadDump'} + ${wf:conf('resumeFrom') eq 'ExtractContent'} + + + - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -37,15 +44,13 @@ download.sh - ${url} - ${crossrefDumpPath} - ${crossrefdumpfilename} - ${crossrefdumptoken} + ${filelist} + ${workingPath}/Original HADOOP_USER_NAME=${wf:user()} download.sh - + @@ -53,12 +58,34 @@ eu.dnetlib.dhp.actionmanager.opencitations.GetOpenCitationsRefs --hdfsNameNode${nameNode} --inputFile${inputFile} - --workingPath${workingDir}/OpenCitations + --workingPath${workingPath} - + - + + + yarn + cluster + Produces the AS for OC + eu.dnetlib.dhp.actionmanager.opencitations.CreateActionSetSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${workingPath}/COCI + --outputPath${outputPath} + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java new file mode 100644 index 0000000000..f3ceaa1ecd --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -0,0 +1,301 @@ + +package eu.dnetlib.dhp.actionmanager.opencitations; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; + +public class CreateOpenCitationsASTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(CreateOpenCitationsASTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(CreateOpenCitationsASTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(CreateOpenCitationsASTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(CreateOpenCitationsASTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testNumberofRelations() throws Exception { + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + CreateActionSetSparkJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + assertEquals(60, tmp.count()); + + tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + + } + + @Test + void testRelationsCollectedFrom() throws Exception { + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + CreateActionSetSparkJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + tmp.foreach(r -> { + assertEquals(ModelConstants.OPENOCITATIONS_NAME, r.getCollectedfrom().get(0).getValue()); + assertEquals(ModelConstants.OPENOCITATIONS_ID, r.getCollectedfrom().get(0).getKey()); + }); + + } + + @Test + void testRelationsDataInfo() throws Exception { + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + CreateActionSetSparkJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + tmp.foreach(r -> { + assertEquals(false, r.getDataInfo().getInferred()); + assertEquals(false, r.getDataInfo().getDeletedbyinference()); + assertEquals("0.91", r.getDataInfo().getTrust()); + assertEquals( + CreateActionSetSparkJob.OPENCITATIONS_CLASSID, r.getDataInfo().getProvenanceaction().getClassid()); + assertEquals( + CreateActionSetSparkJob.OPENCITATIONS_CLASSNAME, r.getDataInfo().getProvenanceaction().getClassname()); + assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, r.getDataInfo().getProvenanceaction().getSchemeid()); + assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, r.getDataInfo().getProvenanceaction().getSchemename()); + }); + + } + + @Test + void testRelationsSemantics() throws Exception { + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + CreateActionSetSparkJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + tmp.foreach(r -> { + assertEquals("citation", r.getSubRelType()); + assertEquals("resultResult", r.getRelType()); + }); + assertEquals(30, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); + assertEquals(30, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); + + } + + @Test + void testRelationsSourceTargetPrefix() throws Exception { + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + CreateActionSetSparkJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + tmp.foreach(r -> { + assertEquals("50|doi_________::", r.getSource().substring(0, 17)); + assertEquals("50|doi_________::", r.getTarget().substring(0, 17)); + }); + + } + + @Test + void testRelationsSourceTargetCouple() throws Exception { + final String doi1 = "50|doi_________::" + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-015-3684-x")); + final String doi2 = "50|doi_________::" + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x")); + final String doi3 = "50|doi_________::" + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-014-2114-9")); + final String doi4 = "50|doi_________::" + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069")); + final String doi5 = "50|doi_________::" + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-009-9913-4")); + final String doi6 = "50|doi_________::" + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5")); + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + CreateActionSetSparkJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + JavaRDD check = tmp.filter(r -> r.getSource().equals(doi1) || r.getTarget().equals(doi1)); + + assertEquals(10, check.count()); + + check.foreach(r -> { + if (r.getSource().equals(doi2) || r.getSource().equals(doi3) || r.getSource().equals(doi4) || + r.getSource().equals(doi5) || r.getSource().equals(doi6)) { + assertEquals(ModelConstants.IS_CITED_BY, r.getRelClass()); + assertEquals(doi1, r.getTarget()); + } + }); + + assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count()); + check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(ModelConstants.CITES, r.getRelClass())); + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 new file mode 100644 index 0000000000..d93d6fd999 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1 @@ -0,0 +1,8 @@ +oci,citing,cited,creation,timespan,journal_sc,author_sc +02001000007362801000805046300010563030608046333-0200101010136193701050501630209010637020000083700020400083733,10.1007/s10854-015-3684-x,10.1111/j.1551-2916.2008.02408.x,2015-09-01,P7Y2M,no,no +02001000007362801000805046300010563030608046333-02001000007362801000805046300010463020101046309,10.1007/s10854-015-3684-x,10.1007/s10854-014-2114-9,2015-09-01,P1Y2M4D,yes,no +02001000007362801000805046300010563030608046333-020010001063619371214271022182329370200010337000937000609,10.1007/s10854-015-3684-x,10.1016/j.ceramint.2013.09.069,2015-09-01,P1Y6M,no,no +02001000007362801000805046300010563030608046333-02001000007362801000805046300000963090901036304,10.1007/s10854-015-3684-x,10.1007/s10854-009-9913-4,2015-09-01,P6Y3M10D,yes,no +02001000007362801000805046300010563030608046333-02001000106360000030863010009085807025909000307006305,10.1007/s10854-015-3684-x,10.1016/0038-1098(72)90370-5,2015-09-01,P43Y8M,no,no +02001000007362801000805046300010563030608056309-02001000106361937281010370200010437000937000308,10.1007/s10854-015-3685-9,10.1016/j.saa.2014.09.038,2015-09-03,P0Y7M,no,no +02001000007362801000805046300010563030608056309-0200100010636193722102912171027370200010537000437000106,10.1007/s10854-015-3685-9,10.1016/j.matchar.2015.04.016,2015-09-03,P0Y2M,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 new file mode 100644 index 0000000000..14ee8b3543 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2 @@ -0,0 +1,8 @@ +oci,citing,cited,creation,timespan,journal_sc,author_sc +02001000308362804010509076300010963000003086301-0200100020936020001003227000009010004,10.1038/s41597-019-0038-1,10.1029/2010wr009104,2019-04-15,P8Y1M,no,no +02001000308362804010509076300010963000003086301-0200100010636280103060463080105025800015900000006006303,10.1038/s41597-019-0038-1,10.1016/s1364-8152(01)00060-3,2019-04-15,P17Y3M,no,no +02001000308362804010509076300010963000003086301-02001000007362800000407076300010063000401066333,10.1038/s41597-019-0038-1,10.1007/s00477-010-0416-x,2019-04-15,P8Y9M6D,no,no +02001000308362804010509076300010963000003086301-02001000007362800000700046300010363000905016308,10.1038/s41597-019-0038-1,10.1007/s00704-013-0951-8,2019-04-15,P5Y9M23D,no,no +02001000308362804010509076300010963000003086301-02001000002361924123705070707,10.1038/s41597-019-0038-1,10.1002/joc.5777,2019-04-15,P0Y8M1D,no,no +02001000308362804010509076300010963000003086301-02005010904361714282863020263040504076302000108,10.1038/s41597-019-0038-1,10.5194/hess-22-4547-2018,2019-04-15,P0Y7M18D,no,no +02001000308362804010509076300010963000003086301-02001000002361924123703050404,10.1038/s41597-019-0038-1,10.1002/joc.3544,2019-04-15,P6Y9M6D,no,no \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 new file mode 100644 index 0000000000..0611929d5b --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3 @@ -0,0 +1,9 @@ +oci,citing,cited,creation,timespan,journal_sc,author_sc +0200100000236090708010101090307000202023727141528-020050302063600040000010307,10.1002/9781119370222.refs,10.5326/0400137,2020-06-22,P16Y3M,no,no +0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020000073700000301093733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2007.00319.x,2020-06-22,P12Y8M,no,no +0200100000236090708010101090307000202023727141528-0200101010136312830370102030509,10.1002/9781119370222.refs,10.1111/vsu.12359,2020-06-22,P4Y10M29D,no,no +0200100000236090708010101090307000202023727141528-020050302063600030900020904,10.1002/9781119370222.refs,10.5326/0390294,2020-06-22,P17Y1M,no,no +0200100000236090708010101090307000202023727141528-020050302063600040200030701,10.1002/9781119370222.refs,10.5326/0420371,2020-06-22,P13Y9M,no,no +0200100000236090708010101090307000202023727141528-0200101010136193701050302630905003337020001033701020000003733,10.1002/9781119370222.refs,10.1111/j.1532-950x.2013.12000.x,2020-06-22,P7Y2M,no,no +0200100000236090708010101090307000202023727141528-020010008003600000408000106093702000006370306070200,10.1002/9781119370222.refs,10.1080/00480169.2006.36720,2020-06-22,P13Y6M,no,no +0200100000236090708010101090307000202023727141528-0200101010136193701070501630008010337020000063700000003033733,10.1002/9781119370222.refs,10.1111/j.1751-0813.2006.00033.x,2020-06-22,P13Y8M,no,no \ No newline at end of file diff --git a/pom.xml b/pom.xml index 61b0ad8733..bd322daae0 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.18] + [2.7.19-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 326bf63775db8e3ef896a2aa1a091ec6e1d2b0ce Mon Sep 17 00:00:00 2001 From: miconis Date: Wed, 13 Oct 2021 12:24:48 +0200 Subject: [PATCH 03/60] integration of parent child orgs relations --- .../raw/MigrateDbEntitiesApplication.java | 28 +++++++++++++++++++ .../sql/queryParentChildRelsOpenOrgs.sql | 13 +++++++++ 2 files changed, 41 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index d78732f9b9..29df17f1bb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -186,6 +186,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i log.info("Processing Openorgs Merge Rels..."); smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels); + + log.info("Processing Openorgs Parent/Child Rels..."); + smdbe.execute("queryParentChildRelsOpenOrgs.sql", smdbe::processOrgOrgParentChildRels); break; case openaire_organizations: @@ -689,6 +692,31 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } + public List processOrgOrgParentChildRels(final ResultSet rs) { + try { + final DataInfo info = prepareDataInfo(rs); // TODO + + final String orgId1 = createOpenaireId(20, rs.getString("source"), true); + final String orgId2 = createOpenaireId(20, rs.getString("target"), true); + + final List collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); + + final Relation r = new Relation(); + r.setRelType(ORG_ORG_RELTYPE); + r.setSubRelType(ModelConstants.RELATIONSHIP); + r.setRelClass(rs.getString("reltype").equalsIgnoreCase("parent") ? ModelConstants.IS_PARENT_OF : ModelConstants.IS_CHILD_OF); + r.setSource(orgId1); + r.setTarget(orgId2); + r.setCollectedfrom(collectedFrom); + r.setDataInfo(info); + r.setLastupdatetimestamp(lastUpdateTimestamp); + + return Arrays.asList(r); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + public List processOrgOrgSimRels(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); // TODO diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql new file mode 100644 index 0000000000..388fee3f58 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryParentChildRelsOpenOrgs.sql @@ -0,0 +1,13 @@ +SELECT + id1 AS source, + id2 AS target, + reltype AS type, + false AS inferred, + false AS deletedbyinference, + 0.95 AS trust, + '' AS inferenceprovenance, + 'openaire____::openorgs' AS collectedfromid, + 'OpenOrgs Database' AS collectedfromname, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction +FROM relationships +WHERE reltype = 'Child' OR reltype = 'Parent' \ No newline at end of file From 995c1eddaf6adcf3a3a24136fc948cd939a5e7d0 Mon Sep 17 00:00:00 2001 From: miconis Date: Wed, 13 Oct 2021 17:07:10 +0200 Subject: [PATCH 04/60] minor change --- .../dhp/oa/graph/raw/MigrateDbEntitiesApplication.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 29df17f1bb..00184bc9db 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -699,12 +699,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final String orgId1 = createOpenaireId(20, rs.getString("source"), true); final String orgId2 = createOpenaireId(20, rs.getString("target"), true); - final List collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); + final List collectedFrom = listKeyValues( + createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); final Relation r = new Relation(); r.setRelType(ORG_ORG_RELTYPE); r.setSubRelType(ModelConstants.RELATIONSHIP); - r.setRelClass(rs.getString("reltype").equalsIgnoreCase("parent") ? ModelConstants.IS_PARENT_OF : ModelConstants.IS_CHILD_OF); + r + .setRelClass( + rs.getString("reltype").equalsIgnoreCase("parent") ? ModelConstants.IS_PARENT_OF + : ModelConstants.IS_CHILD_OF); r.setSource(orgId1); r.setTarget(orgId2); r.setCollectedfrom(collectedFrom); From 1cc09adfaa118a54bf1977e56853044154027268 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 18 Oct 2021 14:11:27 +0200 Subject: [PATCH 05/60] Opencitations: chenaged the test class to mirror the creation or not of duplicate dois for .refs oc original plus added optional parameter to duplicate the relation --- .../CreateActionSetSparkJob.java | 5 +- .../opencitations/CreateRelationsJson.java | 176 ------------------ .../opencitations/OpenCitationModel.java | 5 - .../CreateOpenCitationsASTest.java | 40 +++- 4 files changed, 41 insertions(+), 185 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateRelationsJson.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/OpenCitationModel.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index 9486a74ce2..eeb86a8ff5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -65,7 +65,10 @@ public class CreateActionSetSparkJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath {}", outputPath); - final boolean shouldDuplicateRels = Boolean.valueOf(parser.get("shouldDuplicateRels")); + final boolean shouldDuplicateRels = + Optional.ofNullable(parser.get("shouldDuplicateRels")) + .map(Boolean::valueOf) + .orElse(Boolean.FALSE); SparkConf conf = new SparkConf(); runWithSparkSession( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateRelationsJson.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateRelationsJson.java deleted file mode 100644 index 4996a30898..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateRelationsJson.java +++ /dev/null @@ -1,176 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.opencitations; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.IOException; -import java.io.Serializable; -import java.util.*; - -import org.apache.commons.cli.ParseException; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.action.AtomicAction; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; -import scala.Tuple2; - -public class CreateRelationsJson implements Serializable { - public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations"; - public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations"; - private static final String ID_PREFIX = "50|doi_________::"; - private static final String TRUST = "0.91"; - - private static final Logger log = LoggerFactory.getLogger(CreateRelationsJson.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - public static void main(final String[] args) throws IOException, ParseException { - - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - CreateRelationsJson.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json")))); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}", inputPath.toString()); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}", outputPath); - - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - extractContent(spark, inputPath, outputPath); - }); - - } - - private static void extractContent(SparkSession spark, String inputPath, String outputPath) { - spark - .sqlContext() - .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) - .flatMap( - (FlatMapFunction) value -> createRelation(value).iterator(), - Encoders.bean(Relation.class)) - .filter((FilterFunction) value -> value != null) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - - } - - private static List createRelation(String value) { - String[] line = value.split(","); - if (!line[1].startsWith("10.")) { - return new ArrayList<>(); - } - List relationList = new ArrayList<>(); - - String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[1])); - final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", line[2])); - - relationList - .addAll( - getRelations( - citing, - cited)); - - if (line[1].endsWith(".refs")) { - citing = ID_PREFIX + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue("doi", line[1].substring(0, line[1].indexOf(".refs")))); - relationList.addAll(getRelations(citing, cited)); - } - - return relationList; - } - - private static Collection getRelations(String citing, String cited) { - - return Arrays - .asList( - getRelation(citing, cited, ModelConstants.CITES), - getRelation(cited, citing, ModelConstants.IS_CITED_BY)); - } - - public static Relation getRelation( - String source, - String target, - String relclass) { - Relation r = new Relation(); - r.setCollectedfrom(getCollectedFrom()); - r.setSource(source); - r.setTarget(target); - r.setRelClass(relclass); - r.setRelType(ModelConstants.RESULT_RESULT); - r.setSubRelType(ModelConstants.CITATION); - r - .setDataInfo( - getDataInfo()); - return r; - } - - public static List getCollectedFrom() { - KeyValue kv = new KeyValue(); - kv.setKey(ModelConstants.OPENOCITATIONS_ID); - kv.setValue(ModelConstants.OPENOCITATIONS_NAME); - - return Arrays.asList(kv); - } - - public static DataInfo getDataInfo() { - DataInfo di = new DataInfo(); - di.setInferred(false); - di.setDeletedbyinference(false); - di.setTrust(TRUST); - - di - .setProvenanceaction( - getQualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS)); - return di; - } - - public static Qualifier getQualifier(String class_id, String class_name, - String qualifierSchema) { - Qualifier pa = new Qualifier(); - pa.setClassid(class_id); - pa.setClassname(class_name); - pa.setSchemeid(qualifierSchema); - pa.setSchemename(qualifierSchema); - return pa; - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/OpenCitationModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/OpenCitationModel.java deleted file mode 100644 index 2da96084e3..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/OpenCitationModel.java +++ /dev/null @@ -1,5 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.opencitations; - -public class OpenCitationModel { -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java index f3ceaa1ecd..7567f855ba 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -84,6 +84,8 @@ public class CreateOpenCitationsASTest { new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-shouldDuplicateRels", + Boolean.TRUE.toString(), "-inputPath", inputPath, "-outputPath", @@ -99,7 +101,39 @@ public class CreateOpenCitationsASTest { assertEquals(60, tmp.count()); - tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + + } + + @Test + void testNumberofRelations2() throws Exception { + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); + + CreateActionSetSparkJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + assertEquals(44, tmp.count()); + + // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); } @@ -206,8 +240,8 @@ public class CreateOpenCitationsASTest { assertEquals("citation", r.getSubRelType()); assertEquals("resultResult", r.getRelType()); }); - assertEquals(30, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); - assertEquals(30, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); + assertEquals(22, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); + assertEquals(22, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count()); } From 5f780a6ba19233edf20014cb0b81a8246e962384 Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 18 Oct 2021 23:30:40 +0200 Subject: [PATCH 06/60] bug fix in migrate entities: parameter name was wrong --- .../dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java | 2 +- .../eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 00184bc9db..e453f79186 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -707,7 +707,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r.setSubRelType(ModelConstants.RELATIONSHIP); r .setRelClass( - rs.getString("reltype").equalsIgnoreCase("parent") ? ModelConstants.IS_PARENT_OF + rs.getString("type").equalsIgnoreCase("parent") ? ModelConstants.IS_PARENT_OF : ModelConstants.IS_CHILD_OF); r.setSource(orgId1); r.setTarget(orgId2); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 321ca40909..8139ecf7e3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -258,7 +258,7 @@ ${wf:conf('reuseDB') eq false} ${wf:conf('reuseDB') eq true} - + From c01dd0c92524c21bfe8f06649a6d75f942ffbcc7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 20 Oct 2021 13:55:07 +0200 Subject: [PATCH 07/60] registered oaf model classes for the KryoSerializer --- .../eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java index 4cff88d820..25b343c1a2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java @@ -76,6 +76,9 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication { final Set paths = mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation); final SparkConf conf = new SparkConf(); + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); + runWithSparkSession(conf, isSparkSessionManaged, spark -> processPaths(spark, vocs, hdfsPath, paths)); } From 00b78b9c588e0c36264a3cd128419d91b6dd91c4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 20 Oct 2021 14:04:45 +0200 Subject: [PATCH 08/60] cleanup: mapping contents in the graph already defined in the OAF graph model doesn't require to be aware of the vocabularies --- .../dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java | 9 +-------- .../dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json | 6 ------ .../dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml | 1 - 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java index 25b343c1a2..c016e21565 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java @@ -67,23 +67,16 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication { final String hdfsPath = parser.get("hdfsPath"); log.info("hdfsPath: {}", hdfsPath); - final String isLookupUrl = parser.get("isLookupUrl"); - log.info("isLookupUrl: {}", isLookupUrl); - - final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl); - final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService); - final Set paths = mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation); final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - runWithSparkSession(conf, isSparkSessionManaged, spark -> processPaths(spark, vocs, hdfsPath, paths)); + runWithSparkSession(conf, isSparkSessionManaged, spark -> processPaths(spark, hdfsPath, paths)); } public static void processPaths(final SparkSession spark, - final VocabularyGroup vocs, final String outputPath, final Set paths) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json index 1e862198f1..1d89017c52 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json @@ -28,11 +28,5 @@ "paramLongName": "mdInterpretation", "paramDescription": "metadata interpretation", "paramRequired": true - }, - { - "paramName": "isu", - "paramLongName": "isLookupUrl", - "paramDescription": "the url of the ISLookupService", - "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index c4cca52f62..137c69ed83 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -569,7 +569,6 @@ --mdFormatOAF --mdLayoutstore --mdInterpretationgraph - --isLookupUrl${isLookupUrl} From 4f8970f8ed73acd2c89c23af6c209d0d4443c59c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 20 Oct 2021 14:14:53 +0200 Subject: [PATCH 09/60] [stats] reducing the step22 wait time --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh index dc19f84b4b..03aa535e10 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash curl --request GET $1/cache/updateCache -sleep 20h \ No newline at end of file +sleep 6h \ No newline at end of file From ae4e99a4715811e193ee7892f679125cd8dbbdaa Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 20 Oct 2021 17:12:08 +0200 Subject: [PATCH 10/60] Adapted workflow of resolution of PID to work into OpenAIRE data workflow - Added relations in both verse on all Scholexplorer datasources --- .../java/eu/dnetlib/dhp/utils/DHPUtils.java | 9 + .../DataciteToOAFTransformation.scala | 11 +- .../GenerateDataciteDatasetSpark.scala | 19 ++- .../dhp/collection/CollectionUtils.scala | 43 +++++ .../bio/SparkTransformBioDatabaseToOAF.scala | 9 +- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 2 + .../collection/oozie_app/config-default.xml | 23 +++ .../collection/oozie_app/workflow.xml | 52 ++++++ .../datacite/generate_dataset_params.json | 4 +- .../oozie_app/config-default.xml | 23 +++ .../transformation/oozie_app/workflow.xml | 126 ++++++++++++++ .../resolution/SparkResolveRelation.scala | 152 +++++++++++++++++ .../dhp/sx/graph/SparkResolveRelation.scala | 154 ------------------ .../resolution}/oozie_app/config-default.xml | 0 .../graph/resolution}/oozie_app/workflow.xml | 34 +--- .../resolution}/resolve_relations_params.json | 3 +- .../sx/graph/scholix/ScholixGraphTest.scala | 2 +- 17 files changed, 468 insertions(+), 198 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/{sx/graph/resolverelation => oa/graph/resolution}/oozie_app/config-default.xml (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/{sx/graph/resolverelation => oa/graph/resolution}/oozie_app/workflow.xml (54%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/{sx/graph => oa/graph/resolution}/resolve_relations_params.json (50%) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 6a86f30df7..c53affbadf 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -10,6 +10,7 @@ import java.util.Properties; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import org.apache.commons.codec.binary.Base64; import org.apache.commons.codec.binary.Base64OutputStream; import org.apache.commons.codec.binary.Hex; @@ -56,6 +57,14 @@ public class DHPUtils { return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); } + + public static String generateUnresolvedIdentifier(final String pid, final String pidType) { + + final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid); + + return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim()); + } + public static String getJPathString(final String jsonPath, final String json) { try { Object o = JsonPath.read(json, jsonPath); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index cfdd98d30c..e3729e5b71 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -325,8 +325,9 @@ object DataciteToOAFTransformation { val grantId = m.matcher(awardUri).replaceAll("$2") val targetId = s"$p${DHPUtils.md5(grantId)}" List( - generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo), - generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo) + generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo) +// REMOVED INVERSE RELATION since there is a specific method that should generate later +// generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo) ) } else @@ -580,11 +581,11 @@ object DataciteToOAFTransformation { rel.setProperties(List(dateProps).asJava) rel.setSource(id) - rel.setTarget(s"unresolved::${r.relatedIdentifier}::${r.relatedIdentifierType}") + rel.setTarget(DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier,r.relatedIdentifierType)) rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) - rel.getCollectedfrom.asScala.map(c => c.getValue)(collection.breakOut) + rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel - })(collection breakOut) + }).toList } def generateDataInfo(trust: String): DataInfo = { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala index 2cabc78799..65d00c4d1d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala @@ -1,9 +1,13 @@ package eu.dnetlib.dhp.actionmanager.datacite +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.mdstore.MetadataRecord +import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations +import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -21,7 +25,6 @@ object GenerateDataciteDatasetSpark { parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") - val targetPath = parser.get("targetPath") val exportLinks = "true".equalsIgnoreCase(parser.get("exportLinks")) val isLookupUrl: String = parser.get("isLookupUrl") log.info("isLookupUrl: {}", isLookupUrl) @@ -39,10 +42,22 @@ object GenerateDataciteDatasetSpark { import spark.implicits._ + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + val mapper = new ObjectMapper() + val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + + log.info("outputBasePath: {}", outputBasePath) + val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH" + spark.read.load(sourcePath).as[DataciteType] .filter(d => d.isActive) .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) .filter(d => d != null) + .flatMap(i=> fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) + + val total_items =spark.read.load(targetPath).as[Oaf].count() + writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) } } \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala new file mode 100644 index 0000000000..e212d7e2ad --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.collection + +import eu.dnetlib.dhp.schema.common.ModelSupport +import eu.dnetlib.dhp.schema.oaf.{Oaf, OafEntity, Relation} + +object CollectionUtils { + + /** + * This method in pipeline to the transformation phase, + * generates relations in both verse, typically it should be a phase of flatMap + * + * @param i input OAF + * @return + * If the input OAF is an entity -> List(i) + * If the input OAF is a relation -> List(relation, inverseRelation) + * + */ + + def fixRelations(i: Oaf): List[Oaf] = { + if (i.isInstanceOf[OafEntity]) + return List(i) + else { + val r: Relation = i.asInstanceOf[Relation] + val currentRel = ModelSupport.findRelation(r.getRelClass) + if (currentRel != null) { + + // Cleaning relation + r.setRelType(currentRel.getRelType) + r.setSubRelType(currentRel.getSubReltype) + r.setRelClass(currentRel.getRelClass) + val inverse = new Relation + inverse.setSource(r.getTarget) + inverse.setTarget(r.getSource) + inverse.setRelType(currentRel.getRelType) + inverse.setSubRelType(currentRel.getSubReltype) + inverse.setRelClass(currentRel.getInverseRelClass) + return List(r, inverse) + } + } + List() + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index 7a62437a36..8ae8285e3f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.Oaf import BioDBToOAF.ScholixResolved +import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -35,13 +36,13 @@ object SparkTransformBioDatabaseToOAF { import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "PDB" => - spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "SCHOLIX" => - spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).write.mode(SaveMode.Overwrite).save(targetPath) + spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) case "CROSSREF_LINKS" => - spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).write.mode(SaveMode.Overwrite).save(targetPath) + spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null).write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index b19bfc23a5..8da617ca07 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -5,6 +5,7 @@ import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.BioDBToOAF import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem import BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.collection.CollectionUtils import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -37,6 +38,7 @@ object SparkEBILinksToOaf { ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) + .flatMap(i=> CollectionUtils.fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/config-default.xml new file mode 100644 index 0000000000..dd3c32c620 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/config-default.xml @@ -0,0 +1,23 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/workflow.xml new file mode 100644 index 0000000000..41a2e22916 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/workflow.xml @@ -0,0 +1,52 @@ + + + + mainPath + the working path of Datacite stores + + + isLookupUrl + The IS lookUp service endopoint + + + blocksize + 100 + The request block size + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn-cluster + cluster + ImportDatacite + eu.dnetlib.dhp.actionmanager.datacite.ImportDatacite + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --targetPath${mainPath}/datacite_update + --dataciteDumpPath${mainPath}/datacite_dump + --namenode${nameNode} + --masteryarn-cluster + --blocksize${blocksize} + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json index 67e7f37dcb..04dc8b942c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json @@ -7,8 +7,8 @@ }, { - "paramName": "t", - "paramLongName": "targetPath", + "paramName": "mo", + "paramLongName": "mdstoreOutputVersion", "paramDescription": "the target mdstore path", "paramRequired": true }, diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/config-default.xml new file mode 100644 index 0000000000..dd3c32c620 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/config-default.xml @@ -0,0 +1,23 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/workflow.xml new file mode 100644 index 0000000000..aeb824a41b --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/workflow.xml @@ -0,0 +1,126 @@ + + + + mainPath + the working path of Datacite stores + + + isLookupUrl + The IS lookUp service endopoint + + + mdStoreOutputId + the identifier of the cleaned MDStore + + + mdStoreManagerURI + the path of the cleaned mdstore + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionNEW_VERSION + --mdStoreID${mdStoreOutputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + + + + yarn-cluster + cluster + TransformJob + eu.dnetlib.dhp.actionmanager.datacite.GenerateDataciteDatasetSpark + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${mainPath}/datacite_dump + --mdstoreOutputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --isLookupUrl${isLookupUrl} + --exportLinkstrue + --masteryarn-cluster + + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionREAD_UNLOCK + --mdStoreManagerURI${mdStoreManagerURI} + --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} + + + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionROLLBACK + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala new file mode 100644 index 0000000000..e87f46b00a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -0,0 +1,152 @@ +package eu.dnetlib.dhp.oa.graph.resolution + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport +import eu.dnetlib.dhp.schema.oaf.{Relation, Result} +import eu.dnetlib.dhp.utils.DHPUtils +import org.apache.commons.io.IOUtils +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.spark.SparkConf +import org.apache.spark.rdd.RDD +import org.apache.spark.sql._ +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.jackson.JsonMethods.parse +import org.slf4j.{Logger, LoggerFactory} + +object SparkResolveRelation { + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphBasePath = parser.get("graphBasePath") + log.info(s"graphBasePath -> $graphBasePath") + val workingPath = parser.get("workingPath") + log.info(s"workingPath -> $workingPath") + + implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + import spark.implicits._ + + + //CLEANING TEMPORARY FOLDER + HdfsSupport.remove(workingPath, spark.sparkContext.hadoopConfiguration) + val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) + fs.mkdirs(new Path(workingPath)) + + extractPidResolvedTableFromJsonRDD(spark, graphBasePath, workingPath) + + val mapper: ObjectMapper = new ObjectMapper() + + val rPid: Dataset[(String, String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] + + val relationDs: Dataset[(String, Relation)] = spark.read.text(s"$graphBasePath/relation").as[String] + .map(s => mapper.readValue(s, classOf[Relation])).as[Relation] + .map(r => (r.getSource.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) + + relationDs.joinWith(rPid, relationDs("_1").equalTo(rPid("_2")), "left").map { + m => + val sourceResolved = m._2 + val currentRelation = m._1._2 + if (sourceResolved != null && sourceResolved._1 != null && sourceResolved._1.nonEmpty) + currentRelation.setSource(sourceResolved._1) + currentRelation + }.write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/relationResolvedSource") + + + val relationSourceResolved: Dataset[(String, Relation)] = spark.read.load(s"$workingPath/relationResolvedSource").as[Relation] + .map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) + relationSourceResolved.joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left").map { + m => + val targetResolved = m._2 + val currentRelation = m._1._2 + if (targetResolved != null && targetResolved._1.nonEmpty) + currentRelation.setTarget(targetResolved._1) + currentRelation + }.filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/relation_resolved") + + + // TO BE conservative we keep the original relation in the working dir + // and save the relation resolved on the graphBasePath + //In future this two line of code should be removed + + fs.rename(new Path(s"$graphBasePath/relation"), new Path(s"$workingPath/relation")) + + spark.read.load(s"$workingPath/relation_resolved").as[Relation] + .map(r => mapper.writeValueAsString(r)) + .write + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .text(s"$graphBasePath/relation") + } + + + def extractPidsFromRecord(input: String): (String, List[(String, String)]) = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + val id: String = (json \ "id").extract[String] + val result: List[(String, String)] = for { + JObject(pids) <- json \\ "instance" \ "pid" + JField("value", JString(pidValue)) <- pids + JField("qualifier", JObject(qualifier)) <- pids + JField("classname", JString(pidType)) <- qualifier + } yield (pidValue, pidType) + + val alternateIds: List[(String, String)] = for { + JObject(pids) <- json \\ "alternateIdentifier" + JField("value", JString(pidValue)) <- pids + JField("qualifier", JObject(qualifier)) <- pids + JField("classname", JString(pidType)) <- qualifier + } yield (pidValue, pidType) + + (id, result ::: alternateIds) + } + + + private def isRelation(input: String): Boolean = { + + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + val source = (json \ "source").extractOrElse[String](null) + + source != null + } + + private def extractPidResolvedTableFromJsonRDD(spark: SparkSession, graphPath: String, workingPath: String) = { + import spark.implicits._ + + val d: RDD[(String, String)] = spark.sparkContext.textFile(s"$graphPath/*") + .filter(i => !isRelation(i)) + .map(i => extractPidsFromRecord(i)) + .filter(s => s != null && s._1 != null && s._2 != null && s._2.nonEmpty) + .flatMap { p => + p._2.map(pid => + (p._1, DHPUtils.generateUnresolvedIdentifier(pid._1, pid._2)) + ) + }.filter(r => r._1 != null || r._2 != null) + + spark.createDataset(d) + .groupByKey(_._2) + .reduceGroups((x, y) => if (x._1.startsWith("50|doi") || x._1.startsWith("50|pmid")) x else y) + .map(s => s._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/relationResolvedPid") + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala deleted file mode 100644 index 1b13b81c77..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ /dev/null @@ -1,154 +0,0 @@ -package eu.dnetlib.dhp.sx.graph - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Relation, Result} -import org.apache.commons.io.IOUtils -import org.apache.hadoop.io.compress.GzipCodec -import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql._ -import org.json4s -import org.json4s.DefaultFormats -import org.json4s.JsonAST.{JField, JObject, JString} -import org.json4s.jackson.JsonMethods.parse -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.JavaConverters._ -object SparkResolveRelation { - def main(args: Array[String]): Unit = { - val log: Logger = LoggerFactory.getLogger(getClass) - val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/resolve_relations_params.json"))) - parser.parseArgument(args) - val spark: SparkSession = - SparkSession - .builder() - .config(conf) - .appName(getClass.getSimpleName) - .master(parser.get("master")).getOrCreate() - - - val relationPath = parser.get("relationPath") - log.info(s"sourcePath -> $relationPath") - val entityPath = parser.get("entityPath") - log.info(s"entityPath -> $entityPath") - val workingPath = parser.get("workingPath") - log.info(s"workingPath -> $workingPath") - - implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) - import spark.implicits._ - - - extractPidResolvedTableFromJsonRDD(spark, entityPath, workingPath) - - val mappper = new ObjectMapper() - - val rPid:Dataset[(String,String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String,String)] - - val relationDs:Dataset[(String,Relation)] = spark.read.load(relationPath).as[Relation].map(r => (r.getSource.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - - relationDs.joinWith(rPid, relationDs("_1").equalTo(rPid("_2")), "left").map{ - m => - val sourceResolved = m._2 - val currentRelation = m._1._2 - if (sourceResolved!=null && sourceResolved._1!=null && sourceResolved._1.nonEmpty) - currentRelation.setSource(sourceResolved._1) - currentRelation - }.write - .mode(SaveMode.Overwrite) - .save(s"$workingPath/relationResolvedSource") - - - val relationSourceResolved:Dataset[(String,Relation)] = spark.read.load(s"$workingPath/relationResolvedSource").as[Relation].map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - relationSourceResolved.joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left").map{ - m => - val targetResolved = m._2 - val currentRelation = m._1._2 - if (targetResolved!=null && targetResolved._1.nonEmpty) - currentRelation.setTarget(targetResolved._1) - currentRelation - }.filter(r => r.getSource.startsWith("50")&& r.getTarget.startsWith("50")) - .write - .mode(SaveMode.Overwrite) - .save(s"$workingPath/relation_resolved") - - spark.read.load(s"$workingPath/relation_resolved").as[Relation] - .map(r => mappper.writeValueAsString(r)) - .rdd.saveAsTextFile(s"$workingPath/relation", classOf[GzipCodec]) - - } - - - def extractPidsFromRecord(input:String):(String,List[(String,String)]) = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: json4s.JValue = parse(input) - val id:String = (json \ "id").extract[String] - val result: List[(String,String)] = for { - JObject(pids) <- json \ "pid" - JField("value", JString(pidValue)) <- pids - JField("qualifier", JObject(qualifier)) <- pids - JField("classname", JString(pidType)) <- qualifier - } yield (pidValue, pidType) - - val alternateIds: List[(String,String)] = for { - JObject(pids) <- json \\ "alternateIdentifier" - JField("value", JString(pidValue)) <- pids - JField("qualifier", JObject(qualifier)) <- pids - JField("classname", JString(pidType)) <- qualifier - } yield (pidValue, pidType) - - (id,result:::alternateIds) - } - - private def extractPidResolvedTableFromJsonRDD(spark: SparkSession, entityPath: String, workingPath: String) = { - import spark.implicits._ - - val d: RDD[(String,String)] = spark.sparkContext.textFile(s"$entityPath/*") - .map(i => extractPidsFromRecord(i)) - .filter(s => s != null && s._1!= null && s._2!=null && s._2.nonEmpty) - .flatMap{ p => - p._2.map(pid => - (p._1, convertPidToDNETIdentifier(pid._1, pid._2)) - ) - }.filter(r =>r._1 != null || r._2 != null) - - spark.createDataset(d) - .groupByKey(_._2) - .reduceGroups((x, y) => if (x._1.startsWith("50|doi") || x._1.startsWith("50|pmid")) x else y) - .map(s => s._2) - .write - .mode(SaveMode.Overwrite) - .save(s"$workingPath/relationResolvedPid") - } - - - /* - This method should be used once we finally convert everythings in Kryo dataset - instead of using rdd of json - */ - private def extractPidResolvedTableFromKryo(spark: SparkSession, entityPath: String, workingPath: String) = { - import spark.implicits._ - implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - val entities: Dataset[Result] = spark.read.load(s"$entityPath/*").as[Result] - entities.flatMap(e => e.getPid.asScala - .map(p => - convertPidToDNETIdentifier(p.getValue, p.getQualifier.getClassid)) - .filter(s => s != null) - .map(s => (s, e.getId)) - ).groupByKey(_._1) - .reduceGroups((x, y) => if (x._2.startsWith("50|doi") || x._2.startsWith("50|pmid")) x else y) - .map(s => s._2) - .write - .mode(SaveMode.Overwrite) - .save(s"$workingPath/relationResolvedPid") - } - - def convertPidToDNETIdentifier(pid:String, pidType: String):String = { - if (pid==null || pid.isEmpty || pidType== null || pidType.isEmpty) - null - else - s"unresolved::${pid.toLowerCase}::${pidType.toLowerCase}" - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/config-default.xml rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml similarity index 54% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 7683ff94cd..e9e1a8edea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -1,44 +1,23 @@ - entityPath - the path of deduplicate Entities + graphBasePath + the path of the graph - - relationPath - the path of relation unresolved - - - targetPath - the path of relation unresolved - - - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - yarn cluster Resolve Relations in raw graph - eu.dnetlib.dhp.sx.graph.SparkResolveRelation + eu.dnetlib.dhp.oa.graph.resolution.SparkResolveRelation dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -51,9 +30,8 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn - --relationPath${relationPath} - --workingPath${targetPath} - --entityPath${entityPath} + --graphBasePath${graphBasePath} + --workingPath${workingDir} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolve_relations_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json similarity index 50% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolve_relations_params.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json index f211adb9a4..1fbe206481 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolve_relations_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_relations_params.json @@ -1,6 +1,5 @@ [ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"r", "paramLongName":"relationPath", "paramDescription": "the source Path", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"e", "paramLongName":"entityPath", "paramDescription": "the path of the raw graph", "paramRequired": true} + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index 5b7fbe1cf5..bd7e4fd094 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.sx.graph.scholix import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} +import eu.dnetlib.dhp.oa.graph.resolution.SparkResolveRelation import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary -import eu.dnetlib.dhp.sx.graph.SparkResolveRelation import eu.dnetlib.dhp.sx.graph.bio.pubmed.AbstractVocabularyTest import org.json4s import org.json4s.DefaultFormats From ab3a99d3e9463fce361dc3cd1f2872ddbaefdac0 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 20 Oct 2021 17:19:47 +0200 Subject: [PATCH 11/60] removed old datacite oozie workflow --- .../actionset/oozie_app/config-default.xml | 23 ----- .../datacite/actionset/oozie_app/workflow.xml | 46 ---------- .../datacite/oozie_app/config-default.xml | 23 ----- .../datacite/oozie_app/workflow.xml | 81 ------------------ .../scholix/oozie_app/config-default.xml | 23 ----- .../datacite/scholix/oozie_app/workflow.xml | 84 ------------------- 6 files changed, 280 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/actionset/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/actionset/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/scholix/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/scholix/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/actionset/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/actionset/oozie_app/config-default.xml deleted file mode 100644 index dd3c32c620..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/actionset/oozie_app/config-default.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/actionset/oozie_app/workflow.xml deleted file mode 100644 index 3c58ace7bf..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/actionset/oozie_app/workflow.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - sourcePath - the working path of Datacite stores - - - outputPath - the path of Datacite ActionSet - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - yarn-cluster - cluster - ExportDataset - eu.dnetlib.dhp.actionmanager.datacite.ExportActionSetJobNode - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --targetPath${outputPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml deleted file mode 100644 index dd3c32c620..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml deleted file mode 100644 index c6332ff7d5..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ /dev/null @@ -1,81 +0,0 @@ - - - - mainPath - the working path of Datacite stores - - - isLookupUrl - The IS lookUp service endopoint - - - blocksize - 100 - The request block size - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - yarn-cluster - cluster - ImportDatacite - eu.dnetlib.dhp.actionmanager.datacite.ImportDatacite - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --targetPath${mainPath}/datacite_update - --dataciteDumpPath${mainPath}/datacite_dump - --namenode${nameNode} - --masteryarn-cluster - --blocksize${blocksize} - - - - - - - - - yarn-cluster - cluster - TransformJob - eu.dnetlib.dhp.actionmanager.datacite.GenerateDataciteDatasetSpark - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${mainPath}/datacite_dump - --targetPath${mainPath}/datacite_oaf - --isLookupUrl${isLookupUrl} - --exportLinksfalse - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/scholix/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/scholix/oozie_app/config-default.xml deleted file mode 100644 index dd3c32c620..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/scholix/oozie_app/config-default.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/scholix/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/scholix/oozie_app/workflow.xml deleted file mode 100644 index 397288c694..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/scholix/oozie_app/workflow.xml +++ /dev/null @@ -1,84 +0,0 @@ - - - - datacitePath - the path of Datacite spark dataset - - - isLookupUrl - The IS lookUp service endopoint - - - crossrefPath - the path of Crossref spark dataset - - - - targetPath - the path of Crossref spark dataset - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - yarn-cluster - cluster - ImportDatacite - eu.dnetlib.dhp.actionmanager.datacite.GenerateDataciteDatasetSpark - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${datacitePath} - --targetPath${targetPath}/datacite_oaf - --isLookupUrl${isLookupUrl} - --exportLinkstrue - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - FilterCrossrefEntities - eu.dnetlib.dhp.actionmanager.datacite.FilterCrossrefEntitiesSpark - dhp-aggregation-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${crossrefPath} - --targetPath${targetPath}/crossref_oaf - --masteryarn-cluster - - - - - - - \ No newline at end of file From aeeebd573b2538e78c81ec8b9680ab5f4b4a794d Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 20 Oct 2021 17:37:42 +0200 Subject: [PATCH 12/60] code refactor renamed datacite package --- .../datacite/ExportActionSetJobNode.scala | 41 ----------------- .../FilterCrossrefEntitiesSpark.scala | 46 ------------------- .../datacite/AbstractRestClient.scala | 8 ++-- .../datacite/DataciteAPIImporter.scala | 4 +- .../DataciteToOAFTransformation.scala | 2 +- .../GenerateDataciteDatasetSpark.scala | 19 ++++---- .../datacite/ImportDatacite.scala | 21 ++++----- .../SparkDownloadUpdateDatacite.scala | 16 +++---- .../collection/oozie_app/config-default.xml | 0 .../collection/oozie_app/workflow.xml | 2 +- .../datacite/datacite_filter | 0 .../datacite/exportDataset_parameters.json | 0 .../datacite/filter_crossref_param.json | 0 .../datacite/generate_dataset_params.json | 0 .../datacite/hostedBy_map.json | 0 .../datacite/import_from_api.json | 0 .../oozie_app/config-default.xml | 0 .../transformation/oozie_app/workflow.xml | 2 +- .../datacite/DataciteToOAFTest.scala | 5 +- 19 files changed, 36 insertions(+), 130 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/FilterCrossrefEntitiesSpark.scala rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/{actionmanager => }/datacite/AbstractRestClient.scala (90%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/{actionmanager => }/datacite/DataciteAPIImporter.scala (95%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/{actionmanager => }/datacite/DataciteToOAFTransformation.scala (99%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/{actionmanager => }/datacite/GenerateDataciteDatasetSpark.scala (86%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/{actionmanager => }/datacite/ImportDatacite.scala (93%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/{actionmanager => }/datacite/SparkDownloadUpdateDatacite.scala (68%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/collection/oozie_app/config-default.xml (100%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/collection/oozie_app/workflow.xml (95%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/datacite_filter (100%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/exportDataset_parameters.json (100%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/filter_crossref_param.json (100%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/generate_dataset_params.json (100%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/hostedBy_map.json (100%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/import_from_api.json (100%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/transformation/oozie_app/config-default.xml (100%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionmanager => }/datacite/transformation/oozie_app/workflow.xml (97%) rename dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/{actionmanager => }/datacite/DataciteToOAFTest.scala (88%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala deleted file mode 100644 index 9f0d257359..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala +++ /dev/null @@ -1,41 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.datacite - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Oaf -import org.apache.hadoop.io.Text -import org.apache.hadoop.io.compress.GzipCodec -import org.apache.hadoop.mapred.SequenceFileOutputFormat -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -import scala.io.Source - -object ExportActionSetJobNode { - - val log: Logger = LoggerFactory.getLogger(ExportActionSetJobNode.getClass) - - def main(args: Array[String]): Unit = { - val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json")).mkString) - parser.parseArgument(args) - val master = parser.get("master") - val sourcePath = parser.get("sourcePath") - val targetPath = parser.get("targetPath") - - val spark: SparkSession = SparkSession.builder().config(conf) - .appName(ExportActionSetJobNode.getClass.getSimpleName) - .master(master) - .getOrCreate() - implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING) - - spark.read.load(sourcePath).as[Oaf] - .map(o =>DataciteToOAFTransformation.toActionSet(o)) - .filter(o => o!= null) - .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) - - - } - -} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/FilterCrossrefEntitiesSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/FilterCrossrefEntitiesSpark.scala deleted file mode 100644 index 5860c50aca..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/FilterCrossrefEntitiesSpark.scala +++ /dev/null @@ -1,46 +0,0 @@ -package eu.dnetlib.dhp.actionmanager.datacite - -import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.mdstore.MetadataRecord -import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} -import eu.dnetlib.dhp.utils.ISLookupClientFactory -import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.slf4j.{Logger, LoggerFactory} - -import scala.io.Source - -object FilterCrossrefEntitiesSpark { - - val log: Logger = LoggerFactory.getLogger(getClass.getClass) - - def main(args: Array[String]): Unit = { - val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/filter_crossref_param.json")).mkString) - parser.parseArgument(args) - val master = parser.get("master") - val sourcePath = parser.get("sourcePath") - log.info("sourcePath: {}", sourcePath) - val targetPath = parser.get("targetPath") - log.info("targetPath: {}", targetPath) - - - - val spark: SparkSession = SparkSession.builder().config(conf) - .appName(getClass.getSimpleName) - .master(master) - .getOrCreate() - - - - implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val resEncoder: Encoder[Result] = Encoders.kryo[Result] - - val d:Dataset[Oaf]= spark.read.load(sourcePath).as[Oaf] - - d.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]).write.mode(SaveMode.Overwrite).save(targetPath) - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala similarity index 90% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala index bae41b218b..6a9b8e3e54 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/AbstractRestClient.scala @@ -1,12 +1,10 @@ -package eu.dnetlib.dhp.actionmanager.datacite +package eu.dnetlib.dhp.datacite import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig -import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest} +import org.apache.http.client.methods.{HttpGet, HttpPost, HttpUriRequest} import org.apache.http.entity.StringEntity -import org.apache.http.impl.client.{HttpClientBuilder, HttpClients} - -import java.io.IOException +import org.apache.http.impl.client.HttpClientBuilder abstract class AbstractRestClient extends Iterator[String] { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala index 36ec9e8c33..7ec44a6ff0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteAPIImporter.scala @@ -1,7 +1,7 @@ -package eu.dnetlib.dhp.actionmanager.datacite +package eu.dnetlib.dhp.datacite -import org.json4s.{DefaultFormats, JValue} import org.json4s.jackson.JsonMethods.{compact, parse, render} +import org.json4s.{DefaultFormats, JValue} class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until:Long = -1) extends AbstractRestClient { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index e3729e5b71..6ce4920edf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.actionmanager.datacite +package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala similarity index 86% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala index 65d00c4d1d..a63627d1c7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/GenerateDataciteDatasetSpark.scala @@ -1,10 +1,11 @@ -package eu.dnetlib.dhp.actionmanager.datacite +package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations +import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH +import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord} import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile @@ -21,7 +22,7 @@ object GenerateDataciteDatasetSpark { def main(args: Array[String]): Unit = { val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString) + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json")).mkString) parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") @@ -36,12 +37,12 @@ object GenerateDataciteDatasetSpark { .master(master) .getOrCreate() + import spark.implicits._ + implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord] implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] - import spark.implicits._ - val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") val mapper = new ObjectMapper() val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) @@ -54,10 +55,10 @@ object GenerateDataciteDatasetSpark { .filter(d => d.isActive) .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks)) .filter(d => d != null) - .flatMap(i=> fixRelations(i)).filter(i => i != null) + .flatMap(i => fixRelations(i)).filter(i => i != null) .write.mode(SaveMode.Overwrite).save(targetPath) - val total_items =spark.read.load(targetPath).as[Oaf].count() + val total_items = spark.read.load(targetPath).as[Oaf].count() writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH) } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala index 2b73d29559..018b4958ad 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/ImportDatacite.scala @@ -1,6 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.datacite +package eu.dnetlib.dhp.datacite -import eu.dnetlib.dhp.actionmanager.datacite.DataciteToOAFTransformation.df_it import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path} @@ -9,14 +8,14 @@ import org.apache.hadoop.io.{IntWritable, SequenceFile, Text} import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.functions.max import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse -import org.apache.spark.sql.functions.max import org.slf4j.{Logger, LoggerFactory} -import java.time.format.DateTimeFormatter._ -import java.time.{LocalDate, LocalDateTime, ZoneOffset} +import java.time.format.DateTimeFormatter.ISO_DATE_TIME +import java.time.{LocalDateTime, ZoneOffset} import scala.io.Source object ImportDatacite { @@ -138,11 +137,11 @@ object ImportDatacite { } } - private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = { - var from:Long = timestamp * 1000 - val delta:Long = 100000000L + private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs: Int): Long = { + var from: Long = timestamp * 1000 + val delta: Long = 100000000L var client: DataciteAPIImporter = null - val now :Long =System.currentTimeMillis() + val now: Long = System.currentTimeMillis() var i = 0 try { val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(hdfsTargetPath), SequenceFile.Writer.keyClass(classOf[IntWritable]), SequenceFile.Writer.valueClass(classOf[Text])) @@ -168,7 +167,7 @@ object ImportDatacite { start = System.currentTimeMillis } } - println(s"updating from value: $from -> ${from+delta}") + println(s"updating from value: $from -> ${from + delta}") from = from + delta } } catch { @@ -183,4 +182,4 @@ object ImportDatacite { i } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala similarity index 68% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala index 0b459fcf68..d46e5423d3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/datacite/SparkDownloadUpdateDatacite.scala @@ -1,18 +1,14 @@ -package eu.dnetlib.dhp.actionmanager.datacite - +package eu.dnetlib.dhp.datacite import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.LocalFileSystem -import org.apache.hadoop.hdfs.DistributedFileSystem import org.apache.spark.SparkConf -import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.apache.spark.sql.functions.max +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.slf4j.{Logger, LoggerFactory} import java.text.SimpleDateFormat -import java.util.{Date, Locale} +import java.util.Locale import scala.io.Source object SparkDownloadUpdateDatacite { @@ -21,7 +17,7 @@ object SparkDownloadUpdateDatacite { def main(args: Array[String]): Unit = { val conf = new SparkConf - val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString) + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json")).mkString) parser.parseArgument(args) val master = parser.get("master") val sourcePath = parser.get("sourcePath") @@ -42,9 +38,9 @@ object SparkDownloadUpdateDatacite { import spark.implicits._ - val maxDate:String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0) + val maxDate: String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0) val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) - val string_to_date =ISO8601FORMAT.parse(maxDate) + val string_to_date = ISO8601FORMAT.parse(maxDate) val ts = string_to_date.getTime diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/collection/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/collection/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/collection/oozie_app/workflow.xml similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/workflow.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/collection/oozie_app/workflow.xml index 41a2e22916..6989eed66e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/collection/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/collection/oozie_app/workflow.xml @@ -28,7 +28,7 @@ yarn-cluster cluster ImportDatacite - eu.dnetlib.dhp.actionmanager.datacite.ImportDatacite + eu.dnetlib.dhp.datacite.ImportDatacite dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/datacite_filter b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/datacite_filter similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/datacite_filter rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/datacite_filter diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/exportDataset_parameters.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/exportDataset_parameters.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/filter_crossref_param.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/filter_crossref_param.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/filter_crossref_param.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/filter_crossref_param.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/generate_dataset_params.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/generate_dataset_params.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/hostedBy_map.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/hostedBy_map.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/hostedBy_map.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/import_from_api.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/import_from_api.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/transformation/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/transformation/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/transformation/oozie_app/workflow.xml similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/workflow.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/transformation/oozie_app/workflow.xml index aeb824a41b..d5bae7305d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/transformation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/datacite/transformation/oozie_app/workflow.xml @@ -47,7 +47,7 @@ yarn-cluster cluster TransformJob - eu.dnetlib.dhp.actionmanager.datacite.GenerateDataciteDatasetSpark + eu.dnetlib.dhp.datacite.GenerateDataciteDatasetSpark dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala similarity index 88% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index a795a910da..f21e9eab1c 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -1,8 +1,7 @@ -package eu.dnetlib.dhp.actionmanager.datacite +package eu.dnetlib.dhp.datacite -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.databind.SerializationFeature +import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf import org.junit.jupiter.api.extension.ExtendWith From ac36aa7d1c9324b1b018a71eeb3b4f94b05165a6 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 21 Oct 2021 11:35:02 +0200 Subject: [PATCH 13/60] fixed wrong Encoding during a map phase --- .../eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java index c016e21565..77971a5a43 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java @@ -102,7 +102,7 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication { for (Map.Entry e : ModelSupport.oafTypes.entrySet()) { oaf .filter((FilterFunction) o -> o.getClass().getSimpleName().toLowerCase().equals(e.getKey())) - .map((MapFunction) OBJECT_MAPPER::writeValueAsString, Encoders.bean(e.getValue())) + .map((MapFunction) OBJECT_MAPPER::writeValueAsString, Encoders.STRING()) .write() .option("compression", "gzip") .mode(SaveMode.Append) From 3702fe478d2082cd28b1f27bf0d094600d2e8b9b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 21 Oct 2021 12:05:02 +0200 Subject: [PATCH 14/60] cleanup --- .../oa/graph/raw/CopyHdfsOafApplication.java | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java index 77971a5a43..792264e18b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java @@ -110,23 +110,5 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication { } } } - - private static Relation getInverse(Relation rel, VocabularyGroup vocs) { - final Relation inverse = new Relation(); - - inverse.setProperties(rel.getProperties()); - inverse.setValidated(rel.getValidated()); - inverse.setValidationDate(rel.getValidationDate()); - inverse.setCollectedfrom(rel.getCollectedfrom()); - inverse.setDataInfo(rel.getDataInfo()); - inverse.setLastupdatetimestamp(rel.getLastupdatetimestamp()); - - inverse.setSource(rel.getTarget()); - inverse.setTarget(rel.getSource()); - inverse.setRelType(rel.getRelType()); - inverse.setSubRelType(rel.getSubRelType()); - - return inverse; - } - + } From d147295c2f9b531b8da7973436694c0354c79ba9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 21 Oct 2021 14:15:57 +0200 Subject: [PATCH 15/60] avoiding java.io.NotSerializableException: java.util.HashMap --- .../dhp/oa/graph/raw/CopyHdfsOafApplication.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java index 792264e18b..31ebcbc6e5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java @@ -31,6 +31,7 @@ import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import scala.Tuple2; public class CopyHdfsOafApplication extends AbstractMigrationApplication { @@ -73,10 +74,13 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - runWithSparkSession(conf, isSparkSessionManaged, spark -> processPaths(spark, hdfsPath, paths)); + final List oafTypes = Lists.newArrayList(ModelSupport.oafTypes.keySet()); + + runWithSparkSession(conf, isSparkSessionManaged, spark -> processPaths(spark, oafTypes, hdfsPath, paths)); } public static void processPaths(final SparkSession spark, + final List oafTypes, final String outputPath, final Set paths) { @@ -99,16 +103,16 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication { .as(Encoders.kryo(Oaf.class)); // dispatch each entity type individually in the respective graph subdirectory in append mode - for (Map.Entry e : ModelSupport.oafTypes.entrySet()) { + for (String type : oafTypes) { oaf - .filter((FilterFunction) o -> o.getClass().getSimpleName().toLowerCase().equals(e.getKey())) + .filter((FilterFunction) o -> o.getClass().getSimpleName().toLowerCase().equals(type)) .map((MapFunction) OBJECT_MAPPER::writeValueAsString, Encoders.STRING()) .write() .option("compression", "gzip") .mode(SaveMode.Append) - .text(outputPath + "/" + e.getKey()); + .text(outputPath + "/" + type); } } } - + } From 6b34ba737eae62a3c8c61dc279fbfdd17233aa18 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 21 Oct 2021 14:16:18 +0200 Subject: [PATCH 16/60] minor --- .../src/main/java/eu/dnetlib/dhp/common/HdfsSupport.java | 2 +- dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/HdfsSupport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/HdfsSupport.java index 0b2cd571fa..654fdd5ac8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/HdfsSupport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/HdfsSupport.java @@ -28,7 +28,7 @@ public class HdfsSupport { * @param configuration Configuration of hadoop env */ public static boolean exists(String path, Configuration configuration) { - logger.info("Removing path: {}", path); + logger.info("Checking existence for path: {}", path); return rethrowAsRuntimeException( () -> { Path f = new Path(path); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index c53affbadf..4fee87ed7e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -10,7 +10,6 @@ import java.util.Properties; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import org.apache.commons.codec.binary.Base64; import org.apache.commons.codec.binary.Base64OutputStream; import org.apache.commons.codec.binary.Hex; @@ -27,6 +26,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import com.jayway.jsonpath.JsonPath; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import net.minidev.json.JSONArray; import scala.collection.JavaConverters; import scala.collection.Seq; @@ -57,7 +57,6 @@ public class DHPUtils { return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); } - public static String generateUnresolvedIdentifier(final String pid, final String pidType) { final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid); From 034304b33af2b8fdec46cafc8f3c81f4fb704c2e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 26 Oct 2021 09:40:47 +0200 Subject: [PATCH 17/60] conflict resolved on merge --- .../java/eu/dnetlib/dhp/utils/DHPUtils.java | 82 ++++++++---- .../dhp/sx/bio/ebi/oozie_app/workflow.xml | 4 +- .../dhp/sx/bio/pubmed/oozie_app/workflow.xml | 2 +- .../oa/graph/raw/CopyHdfsOafApplication.java | 118 ------------------ .../raw/CopyHdfsOafSparkApplication.scala | 74 +++++++++++ .../common/AbstractMigrationApplication.java | 23 +--- .../oa/graph/copy_hdfs_oaf_parameters.json | 6 + .../oa/graph/raw_all/oozie_app/workflow.xml | 3 +- 8 files changed, 147 insertions(+), 165 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 4fee87ed7e..66e4cb780f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -1,36 +1,35 @@ package eu.dnetlib.dhp.utils; -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.binary.Base64OutputStream; -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SaveMode; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import com.jayway.jsonpath.JsonPath; - +import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import net.minidev.json.JSONArray; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SaveMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import scala.collection.JavaConverters; import scala.collection.Seq; +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.*; +import java.util.stream.Collectors; + public class DHPUtils { private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); @@ -53,6 +52,45 @@ public class DHPUtils { } } + /** + * Retrieves from the metadata store manager application the list of paths associated with mdstores characterized + * by he given format, layout, interpretation + * @param mdstoreManagerUrl the URL of the mdstore manager service + * @param format the mdstore format + * @param layout the mdstore layout + * @param interpretation the mdstore interpretation + * @param includeEmpty include Empty mdstores + * @return the set of hdfs paths + * @throws IOException in case of HTTP communication issues + */ + public static Set mdstorePaths(final String mdstoreManagerUrl, + final String format, + final String layout, + final String interpretation, + boolean includeEmpty) throws IOException { + final String url = mdstoreManagerUrl + "/mdstores/"; + final ObjectMapper objectMapper = new ObjectMapper(); + + final HttpGet req = new HttpGet(url); + + try (final CloseableHttpClient client = HttpClients.createDefault()) { + try (final CloseableHttpResponse response = client.execute(req)) { + final String json = IOUtils.toString(response.getEntity().getContent()); + final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class); + return Arrays + .stream(mdstores) + .filter(md -> md.getFormat().equalsIgnoreCase(format)) + .filter(md -> md.getLayout().equalsIgnoreCase(layout)) + .filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation)) + .filter(md -> StringUtils.isNotBlank(md.getHdfsPath())) + .filter(md -> StringUtils.isNotBlank(md.getCurrentVersion())) + .filter(md -> includeEmpty || md.getSize() > 0) + .map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store") + .collect(Collectors.toSet()); + } + } + } + public static String generateIdentifier(final String originalId, final String nsPrefix) { return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml index 73b1a3b608..4b47ae38e4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml @@ -52,7 +52,7 @@ yarn-cluster cluster Incremental Download EBI Links - eu.dnetllib.dhp.sx.bio.ebi.SparkDownloadEBILinks + eu.dnetlib.dhp.sx.bio.ebi.SparkDownloadEBILinks dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -85,7 +85,7 @@ yarn-cluster cluster Create OAF DataSet - eu.dnetllib.dhp.sx.bio.ebi.SparkEBILinksToOaf + eu.dnetlib.dhp.sx.bio.ebi.SparkEBILinksToOaf dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 21fd2d1535..8915a090bd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -30,7 +30,7 @@ yarn cluster Convert Baseline to OAF Dataset - eu.dnetllib.dhp.sx.bio.ebi.SparkCreateBaselineDataFrame + eu.dnetlib.dhp.sx.bio.ebi.SparkCreateBaselineDataFrame dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java deleted file mode 100644 index 31ebcbc6e5..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java +++ /dev/null @@ -1,118 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.raw; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.util.*; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.clearspring.analytics.util.Lists; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import scala.Tuple2; - -public class CopyHdfsOafApplication extends AbstractMigrationApplication { - - private static final Logger log = LoggerFactory.getLogger(CopyHdfsOafApplication.class); - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - CopyHdfsOafApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json"))); - parser.parseArgument(args); - - final Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String mdstoreManagerUrl = parser.get("mdstoreManagerUrl"); - log.info("mdstoreManagerUrl: {}", mdstoreManagerUrl); - - final String mdFormat = parser.get("mdFormat"); - log.info("mdFormat: {}", mdFormat); - - final String mdLayout = parser.get("mdLayout"); - log.info("mdLayout: {}", mdLayout); - - final String mdInterpretation = parser.get("mdInterpretation"); - log.info("mdInterpretation: {}", mdInterpretation); - - final String hdfsPath = parser.get("hdfsPath"); - log.info("hdfsPath: {}", hdfsPath); - - final Set paths = mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation); - - final SparkConf conf = new SparkConf(); - conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - - final List oafTypes = Lists.newArrayList(ModelSupport.oafTypes.keySet()); - - runWithSparkSession(conf, isSparkSessionManaged, spark -> processPaths(spark, oafTypes, hdfsPath, paths)); - } - - public static void processPaths(final SparkSession spark, - final List oafTypes, - final String outputPath, - final Set paths) { - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - log.info("Found {} mdstores", paths.size()); - paths.forEach(log::info); - - final String[] validPaths = paths - .stream() - .filter(p -> HdfsSupport.exists(p, sc.hadoopConfiguration())) - .toArray(String[]::new); - log.info("Non empty mdstores {}", validPaths.length); - - if (validPaths.length > 0) { - // load the dataset - Dataset oaf = spark - .read() - .load(validPaths) - .as(Encoders.kryo(Oaf.class)); - - // dispatch each entity type individually in the respective graph subdirectory in append mode - for (String type : oafTypes) { - oaf - .filter((FilterFunction) o -> o.getClass().getSimpleName().toLowerCase().equals(type)) - .map((MapFunction) OBJECT_MAPPER::writeValueAsString, Encoders.STRING()) - .write() - .option("compression", "gzip") - .mode(SaveMode.Append) - .text(outputPath + "/" + type); - } - } - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala new file mode 100644 index 0000000000..c7ad1890de --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -0,0 +1,74 @@ +package eu.dnetlib.dhp.oa.graph.raw + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport +import eu.dnetlib.dhp.schema.common.ModelSupport +import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.utils.DHPUtils +import org.apache.commons.io.IOUtils +import org.apache.commons.lang3.StringUtils +import org.apache.http.client.methods.HttpGet +import org.apache.http.impl.client.HttpClients +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} +import org.slf4j.LoggerFactory + +import scala.collection.JavaConverters._ +import scala.io.Source + +object CopyHdfsOafSparkApplication { + + def main(args: Array[String]): Unit = { + val log = LoggerFactory.getLogger(getClass) + val conf = new SparkConf() + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json")).mkString) + parser.parseArgument(args) + + val spark = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + val sc: SparkContext = spark.sparkContext + + val mdstoreManagerUrl = parser.get("mdstoreManagerUrl") + log.info("mdstoreManagerUrl: {}", mdstoreManagerUrl) + + val mdFormat = parser.get("mdFormat") + log.info("mdFormat: {}", mdFormat) + + val mdLayout = parser.get("mdLayout") + log.info("mdLayout: {}", mdLayout) + + val mdInterpretation = parser.get("mdInterpretation") + log.info("mdInterpretation: {}", mdInterpretation) + + val hdfsPath = parser.get("hdfsPath") + log.info("hdfsPath: {}", hdfsPath) + + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + + val paths = DHPUtils.mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation, true).asScala + + val validPaths: List[String] = paths.filter(p => HdfsSupport.exists(p, sc.hadoopConfiguration)).toList + + if (validPaths.nonEmpty) { + val oaf = spark.read.load(validPaths: _*).as[Oaf] + val mapper = new ObjectMapper() + val l =ModelSupport.oafTypes.entrySet.asScala.map(e => e.getKey).toList + l.foreach( + e => + oaf.filter(o => o.getClass.getSimpleName.equalsIgnoreCase(e)) + .map(s => mapper.writeValueAsString(s))(Encoders.STRING) + .write + .option("compression", "gzip") + .mode(SaveMode.Append) + .text(s"$hdfsPath/${e}") + ) + } + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java index 8cd495e08a..cba64899b5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java @@ -26,6 +26,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo; import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.utils.DHPUtils; public class AbstractMigrationApplication implements Closeable { @@ -71,27 +72,7 @@ public class AbstractMigrationApplication implements Closeable { final String format, final String layout, final String interpretation) throws IOException { - final String url = mdstoreManagerUrl + "/mdstores/"; - final ObjectMapper objectMapper = new ObjectMapper(); - - final HttpGet req = new HttpGet(url); - - try (final CloseableHttpClient client = HttpClients.createDefault()) { - try (final CloseableHttpResponse response = client.execute(req)) { - final String json = IOUtils.toString(response.getEntity().getContent()); - final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class); - return Arrays - .stream(mdstores) - .filter(md -> md.getFormat().equalsIgnoreCase(format)) - .filter(md -> md.getLayout().equalsIgnoreCase(layout)) - .filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation)) - .filter(md -> StringUtils.isNotBlank(md.getHdfsPath())) - .filter(md -> StringUtils.isNotBlank(md.getCurrentVersion())) - .filter(md -> md.getSize() > 0) - .map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store") - .collect(Collectors.toSet()); - } - } + return DHPUtils.mdstorePaths(mdstoreManagerUrl, format, layout, interpretation, false); } private Configuration getConf() { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json index 1d89017c52..d1b16b09a6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json @@ -23,6 +23,12 @@ "paramDescription": "metadata layout", "paramRequired": true }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "should be yarn or local", + "paramRequired": true + }, { "paramName": "i", "paramLongName": "mdInterpretation", diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 137c69ed83..307e262672 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -553,7 +553,7 @@ yarn cluster ImportOAF_hdfs_graph - eu.dnetlib.dhp.oa.graph.raw.CopyHdfsOafApplication + eu.dnetlib.dhp.oa.graph.raw.CopyHdfsOafSparkApplication dhp-graph-mapper-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} @@ -568,6 +568,7 @@ --mdstoreManagerUrl${mdstoreManagerUrl} --mdFormatOAF --mdLayoutstore + --masteryarn --mdInterpretationgraph From aafdffa6b3c3d423a1b9a0085b4d6540bb723e13 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 26 Oct 2021 09:45:46 +0200 Subject: [PATCH 18/60] resolved conflict --- .../java/eu/dnetlib/dhp/utils/DHPUtils.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 66e4cb780f..5a59bc0dfe 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.utils; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Maps; -import com.jayway.jsonpath.JsonPath; -import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import net.minidev.json.JSONArray; +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.*; +import java.util.stream.Collectors; + import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -21,15 +21,17 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SaveMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Maps; +import com.jayway.jsonpath.JsonPath; + +import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import net.minidev.json.JSONArray; import scala.collection.JavaConverters; import scala.collection.Seq; -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; -import java.util.*; -import java.util.stream.Collectors; - public class DHPUtils { private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); From 4acfa8fa2e317874caabe209f1d6c685b36d9a66 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 26 Oct 2021 17:51:20 +0200 Subject: [PATCH 19/60] Scholexplorer Datasource Aggregation: - Added collectedfrom in the inverse relation generated Relation resolution: - increased number of partitions in workflow.xml - using classid instead of classname to build the pid-dnetId mapping --- .../java/eu/dnetlib/dhp/collection/CollectionUtils.scala | 6 ++++++ .../dhp/oa/graph/resolution/SparkResolveRelation.scala | 4 ++-- .../dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala index e212d7e2ad..11ecfd6cb6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectionUtils.scala @@ -34,6 +34,12 @@ object CollectionUtils { inverse.setRelType(currentRel.getRelType) inverse.setSubRelType(currentRel.getSubReltype) inverse.setRelClass(currentRel.getInverseRelClass) + inverse.setCollectedfrom(r.getCollectedfrom) + inverse.setDataInfo(r.getDataInfo) + inverse.setProperties(r.getProperties) + inverse.setLastupdatetimestamp(r.getLastupdatetimestamp) + inverse.setValidated(r.getValidated) + inverse.setValidationDate(r.getValidationDate) return List(r, inverse) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index e87f46b00a..5ca7d9782d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -104,14 +104,14 @@ object SparkResolveRelation { JObject(pids) <- json \\ "instance" \ "pid" JField("value", JString(pidValue)) <- pids JField("qualifier", JObject(qualifier)) <- pids - JField("classname", JString(pidType)) <- qualifier + JField("classid", JString(pidType)) <- qualifier } yield (pidValue, pidType) val alternateIds: List[(String, String)] = for { JObject(pids) <- json \\ "alternateIdentifier" JField("value", JString(pidValue)) <- pids JField("qualifier", JObject(qualifier)) <- pids - JField("classname", JString(pidType)) <- qualifier + JField("classid", JString(pidType)) <- qualifier } yield (pidValue, pidType) (id, result ::: alternateIds) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index e9e1a8edea..31cc53ae3c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -24,7 +24,7 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=3000 + --conf spark.sql.shuffle.partitions=8000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} From 1be9aa0a5f2d747ad7eb41191956d14a4f3c0943 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 26 Oct 2021 17:52:20 +0200 Subject: [PATCH 20/60] Removed filter of datacite items from the raw graph merging phase, Datacite is not an actionset anymore in beta --- .../dhp/oa/graph/merge/MergeGraphTableSparkJob.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java index ef419a042e..474944260b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java @@ -127,13 +127,6 @@ public class MergeGraphTableSparkJob { } }, Encoders.bean(p_clazz)) .filter((FilterFunction

) Objects::nonNull) - .filter((FilterFunction

) o -> { - HashSet collectedFromNames = Optional - .ofNullable(o.getCollectedfrom()) - .map(c -> c.stream().map(KeyValue::getValue).collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet<>()); - return !collectedFromNames.contains("Datacite"); - }) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") From d9cbca83f7911b53190a9f999f5e02a7f00382ac Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 28 Oct 2021 16:13:24 +0200 Subject: [PATCH 21/60] moved filter on next phase --- .../dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index 5ca7d9782d..db93bc43fa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -75,7 +75,7 @@ object SparkResolveRelation { if (targetResolved != null && targetResolved._1.nonEmpty) currentRelation.setTarget(targetResolved._1) currentRelation - }.filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) + } .write .mode(SaveMode.Overwrite) .save(s"$workingPath/relation_resolved") @@ -88,6 +88,7 @@ object SparkResolveRelation { fs.rename(new Path(s"$graphBasePath/relation"), new Path(s"$workingPath/relation")) spark.read.load(s"$workingPath/relation_resolved").as[Relation] + .filter(r => !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")) .map(r => mapper.writeValueAsString(r)) .write .option("compression", "gzip") From 1225ba0b9223ae6049ea5c80772dcf72407a01b8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 28 Oct 2021 16:18:17 +0200 Subject: [PATCH 22/60] [resolution] increasing number of partitions to avoid OOM --- .../eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 31cc53ae3c..52f0e6e9d0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -24,7 +24,7 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=8000 + --conf spark.sql.shuffle.partitions=15000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} From 7bd224f051598cf0b9370e5b33f6862477026de1 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 2 Nov 2021 15:58:15 +0100 Subject: [PATCH 23/60] implement first version of scholexplorer integration for the generation of final graph --- .../dhp/sx/graph/SparkConvertRDDtoDataset.scala | 2 +- .../eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala | 10 +++++++--- .../eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala | 2 +- .../dhp/sx/graph/finalGraph/oozie_app/workflow.xml | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index cb41d6134c..141b7b0736 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -59,7 +59,7 @@ object SparkConvertRDDtoDataset { log.info("Converting Relation") - val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation").map(s => mapper.readValue(s, classOf[Relation])) + val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation").map(s => mapper.readValue(s, classOf[Relation])).filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index 0a7fc18fb6..e4fcd27824 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -51,10 +51,14 @@ object SparkCreateScholix { relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left") .map { input: ((String, Relation), (String, ScholixSummary)) => - val rel: Relation = input._1._2 - val source: ScholixSummary = input._2._2 - (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) + if (input._1!= null && input._2!= null) { + val rel: Relation = input._1._2 + val source: ScholixSummary = input._2._2 + (rel.getTarget, ScholixUtils.scholixFromSource(rel, source)) + } + else null }(Encoders.tuple(Encoders.STRING, scholixEncoder)) + .filter(r => r!= null) .write.mode(SaveMode.Overwrite).save(s"$targetPath/scholix_from_source") val scholixSource: Dataset[(String, Scholix)] = spark.read.load(s"$targetPath/scholix_from_source").as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index 4dafd4fa36..93c554e048 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -289,7 +289,7 @@ object ScholixUtils { if (r.getInstance() == null || r.getInstance().isEmpty) return List() r.getInstance().asScala.filter(i => i.getUrl!= null && !i.getUrl.isEmpty) - + .filter(i => i.getPid!= null && i.getUrl != null) .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)).distinct.toList } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml index d8eb1fc80b..17996c82c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml @@ -54,7 +54,7 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=5000 + --conf spark.sql.shuffle.partitions=20000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} @@ -79,7 +79,7 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=6000 + --conf spark.sql.shuffle.partitions=20000 --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} From 2480e590d1c5b440c336febc51fa3f53270e3dbf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 14:25:23 +0100 Subject: [PATCH 24/60] [DOIBoost - Mapping] changed the type on which to map dissertation from Crossref: from 006 Doctoral thesis to 0044 Thesis since dissertation could be either Doctoral or master thesis --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 25f0ff381f..dc9e18fde5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -70,7 +70,7 @@ case object Crossref2Oaf { "reference-book" -> "0002 Book", "monograph" -> "0002 Book", "journal-article" -> "0001 Article", - "dissertation" -> "0006 Doctoral thesis", + "dissertation" -> "0044 Thesis", "other" -> "0038 Other literature type", "peer-review" -> "0015 Review", "proceedings" -> "0004 Conference object", From 779318961cd4e7a1b24990229caf74a1775230ac Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 14:38:52 +0100 Subject: [PATCH 25/60] [DOIBoost - Mapping] removed the url from crossref containing the api.elsevier.com... string in the url --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index dc9e18fde5..53a9e8bd46 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -207,7 +207,7 @@ case object Crossref2Oaf { instance.setDateofacceptance(asField(createdDate.getValue)) } val s: String = (json \ "URL").extract[String] - val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null).distinct + val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && !p.contains("api.elsevier.com...")).distinct if (links.nonEmpty) { instance.setUrl(links.asJava) } From b2bb8d9d7908508f7d7abaf4d0d5d246fb38a42d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 15:44:57 +0100 Subject: [PATCH 26/60] [DOIBoost - Mapping] selecting the url from Crossref containing the doi --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 53a9e8bd46..51637f5bb5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -207,7 +207,7 @@ case object Crossref2Oaf { instance.setDateofacceptance(asField(createdDate.getValue)) } val s: String = (json \ "URL").extract[String] - val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && !p.contains("api.elsevier.com...")).distinct + val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct if (links.nonEmpty) { instance.setUrl(links.asJava) } From 683fe093cff2a76fb42d27c52c5c61831f48c268 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 15:51:26 +0100 Subject: [PATCH 27/60] [DOIBoost - Mapping] Remove the addition of the instance to the MAG publication record --- .../doiboost/mag/SparkProcessMAG.scala | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index ecb389af87..c011cbd208 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -111,22 +111,24 @@ object SparkProcessMAG { .map(item => ConversionUtil.updatePubsWithConferenceInfo(item)) .write .mode(SaveMode.Overwrite) - .save(s"$workingPath/merge_step_2_conference") - - - magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] - - val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl] - - - logger.info("Phase 5) enrich publication with URL and Instances") - magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left") - .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) } - .write.mode(SaveMode.Overwrite) .save(s"$workingPath/merge_step_3") + //no more needed to add the instance to mag records +// magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication] +// .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] +// +// val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl] +// +// +// +// logger.info("Phase 5) enrich publication with URL and Instances") +// magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left") +// .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) } +// .write.mode(SaveMode.Overwrite) +// .save(s"$workingPath/merge_step_3") + + // logger.info("Phase 6) Enrich Publication with description") // val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] // pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") From 96769b44816bbebbeee2bf47c03e9079f83ea441 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 16:43:36 +0100 Subject: [PATCH 28/60] [DOIBoost - Mapping] Changed the logic which brought in in the instance urls that should not be there: The urld of the doi in the json is reachable from the root (json/"URL") other urls where added from the links element. Now the mapping from the link element has been removed --- .../dnetlib/doiboost/crossref/Crossref2Oaf.scala | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 51637f5bb5..4c06d283a6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -206,11 +206,16 @@ case object Crossref2Oaf { else { instance.setDateofacceptance(asField(createdDate.getValue)) } - val s: String = (json \ "URL").extract[String] - val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct - if (links.nonEmpty) { - instance.setUrl(links.asJava) - } + val s: List[String] = List((json \ "URL").extract[String]) +// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct +// if (links.nonEmpty) { +// instance.setUrl(links.asJava) +// } + if(s.nonEmpty) + { + instance.setUrl(s.asJava) + } + result.setInstance(List(instance).asJava) //IMPORTANT From d97ea82a2922846a2e03df84992df10cb479287c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 16:45:15 +0100 Subject: [PATCH 29/60] [DOIBoost Mapping] Added test to verify the instance created for Crossref will have just the url related to the doi --- .../crossref/CrossrefMappingTest.scala | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index 75fb3f787a..f6d5e124ea 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -612,4 +612,26 @@ class CrossrefMappingTest { } + @Test + def testMultipleURLs() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("multiple_urls.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertEquals(1, item.getInstance().size()) + assertEquals(1, item.getInstance().get(0).getUrl().size()) + assertEquals("http://dx.doi.org/10.1016/j.jas.2019.105013", item.getInstance().get(0).getUrl().get(0)) + //println(mapper.writeValueAsString(item)) + + } + } From edf55395e97d92d9b8eb7c29184f0664b0ed7345 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 16:49:30 +0100 Subject: [PATCH 30/60] added test resourse --- .../doiboost/crossref/multiple_urls.json | 614 ++++++++++++++++++ 1 file changed, 614 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/multiple_urls.json diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/multiple_urls.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/multiple_urls.json new file mode 100644 index 0000000000..5f90feac41 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/multiple_urls.json @@ -0,0 +1,614 @@ + +{ +"indexed": { +"date-parts": [ +[ +2021, +10, +31 +] +], +"date-time": "2021-10-31T15:48:01Z", +"timestamp": 1635695281393 +}, +"reference-count": 39, +"publisher": "Elsevier BV", +"license": [ +{ +"start": { +"date-parts": [ +[ +2019, +12, +1 +] +], +"date-time": "2019-12-01T00:00:00Z", +"timestamp": 1575158400000 +}, +"content-version": "tdm", +"delay-in-days": 0, +"URL": "https://www.elsevier.com/tdm/userlicense/1.0/" +}, +{ +"start": { +"date-parts": [ +[ +2019, +9, +13 +] +], +"date-time": "2019-09-13T00:00:00Z", +"timestamp": 1568332800000 +}, +"content-version": "vor", +"delay-in-days": 0, +"URL": "http://creativecommons.org/licenses/by/4.0/" +} +], +"funder": [ +{ +"DOI": "10.13039/100001182", +"name": "INSTAP", +"doi-asserted-by": "publisher" +}, +{ +"DOI": "10.13039/100014440", +"name": "Ministry of Science, Innovation and Universities", +"doi-asserted-by": "publisher", +"award": [ +"RYC-2016-19637" +] +}, +{ +"DOI": "10.13039/100010661", +"name": "European Union’s Horizon 2020", +"doi-asserted-by": "publisher", +"award": [ +"746446" +] +} +], +"content-domain": { +"domain": [ +"elsevier.com", +"sciencedirect.com" +], +"crossmark-restriction": true +}, +"short-container-title": [ +"Journal of Archaeological Science" +], +"published-print": { +"date-parts": [ +[ +2019, +12 +] +] +}, +"DOI": "10.1016/j.jas.2019.105013", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2019, +9, +25 +] +], +"date-time": "2019-09-25T20:05:08Z", +"timestamp": 1569441908000 +}, +"page": "105013", +"update-policy": "http://dx.doi.org/10.1016/elsevier_cm_policy", +"source": "Crossref", +"is-referenced-by-count": 21, +"title": [ +"A brave new world for archaeological survey: Automated machine learning-based potsherd detection using high-resolution drone imagery" +], +"prefix": "10.1016", +"volume": "112", +"author": [ +{ +"given": "H.A.", +"family": "Orengo", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"given": "A.", +"family": "Garcia-Molsosa", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "78", +"reference": [ +{ +"key": "10.1016/j.jas.2019.105013_bib1", +"doi-asserted-by": "crossref", +"first-page": "85", +"DOI": "10.1080/17538947.2016.1250829", +"article-title": "Remote sensing heritage in a petabyte-scale: satellite data and heritage Earth Engine© applications", +"volume": "10", +"author": "Agapiou", +"year": "2017", +"journal-title": "Int. J. Digit. Earth" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib2", +"series-title": "Extracting Meaning from Ploughsoil Assemblages", +"first-page": "1", +"article-title": "Extracting meaning from ploughsoil assemblages: assessments of the past, strategies for the future", +"author": "Alcock", +"year": "2000" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib3", +"series-title": "Side-by-Side Survey. Comparative Regional Studies in the Mediterranean World", +"first-page": "1", +"article-title": "Introduction", +"author": "Alcock", +"year": "2004" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib4", +"doi-asserted-by": "crossref", +"first-page": "93", +"DOI": "10.1111/j.1538-4632.1995.tb00338.x", +"article-title": "Local indicators of spatial association—LISA", +"volume": "27", +"author": "Anselin", +"year": "1995", +"journal-title": "Geogr. Anal." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib5", +"series-title": "Archaeological Survey", +"author": "Banning", +"year": "2002" +}, +{ +"issue": "1/2", +"key": "10.1016/j.jas.2019.105013_bib6", +"doi-asserted-by": "crossref", +"first-page": "123", +"DOI": "10.2307/3181488", +"article-title": "GIS, archaeological survey and landscape archaeology on the island of Kythera, Greece", +"volume": "29", +"author": "Bevan", +"year": "2004", +"journal-title": "J. Field Archaeol." +}, +{ +"issue": "1", +"key": "10.1016/j.jas.2019.105013_bib8", +"doi-asserted-by": "crossref", +"first-page": "5", +"DOI": "10.1023/A:1010933404324", +"article-title": "Random forests", +"volume": "45", +"author": "Breiman", +"year": "2001", +"journal-title": "Mach. Learn." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib9", +"series-title": "Sampling in Contemporary British Archaeology", +"author": "Cherry", +"year": "1978" +}, +{ +"issue": "3", +"key": "10.1016/j.jas.2019.105013_bib10", +"doi-asserted-by": "crossref", +"first-page": "273", +"DOI": "10.1016/0734-189X(84)90197-X", +"article-title": "Segmentation of a high-resolution urban scene using texture operators", +"volume": "25", +"author": "Conners", +"year": "1984", +"journal-title": "Comput. Vis. Graph Image Process" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib11", +"first-page": "31", +"article-title": "Old land surfaces and modern ploughsoil: implications of recent work at Maxey, Cambridgeshire", +"volume": "2", +"author": "Crowther", +"year": "1983", +"journal-title": "Scott. Archaeol. Rev." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib12", +"series-title": "Settlement Pattern Studies in the Americas: Fifty Years since Virú", +"first-page": "203", +"article-title": "Conclusions: the settlement pattern concept from an Americanist perspective", +"author": "Fish", +"year": "1999" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib13", +"doi-asserted-by": "crossref", +"first-page": "21", +"DOI": "10.3390/geosciences9010021", +"article-title": "Remote sensing and historical morphodynamics of alluvial plains. The 1909 indus flood and the city of Dera Gazhi Khan (province of Punjab, Pakistan)", +"volume": "9", +"author": "Garcia", +"year": "2019", +"journal-title": "Geosciences" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib14", +"unstructured": "Georgiadis, M.; Garcia-Molsosa, A.; Orengo, H.A.; Kefalidou, E. and Kallintzi, K. In Preparation. APAX Project 2015-2018: A Preliminary Report. (Hesperia)." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib15", +"series-title": "Geographical Information Systems and Landscape Archaeology", +"first-page": "35", +"article-title": "Regional survey and GIS: the boeotia project", +"author": "Gillings", +"year": "1999" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib16", +"doi-asserted-by": "crossref", +"first-page": "18", +"DOI": "10.1016/j.rse.2017.06.031", +"article-title": "Google Earth engine: planetary-scale geospatial analysis for everyone", +"volume": "202", +"author": "Gorelick", +"year": "2017", +"journal-title": "Remote Sens. Environ." +}, +{ +"issue": "107", +"key": "10.1016/j.jas.2019.105013_bib17", +"doi-asserted-by": "crossref", +"first-page": "177", +"DOI": "10.1111/j.0031-868X.2004.00278.x", +"article-title": "Photogrammetric reconstruction of the great buddha of Bamiyan, Afghanistan", +"volume": "19", +"author": "Grün", +"year": "2004", +"journal-title": "Photogramm. Rec." +}, +{ +"issue": "6", +"key": "10.1016/j.jas.2019.105013_bib18", +"doi-asserted-by": "crossref", +"first-page": "610", +"DOI": "10.1109/TSMC.1973.4309314", +"article-title": "Textural features for image classification", +"author": "Haralick", +"year": "1973", +"journal-title": "IEEE Trans. Syst., Man, Cybernet., SMC-3" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib19", +"doi-asserted-by": "crossref", +"first-page": "76", +"DOI": "10.1558/jmea.v14i1.76", +"article-title": "Excavating to excess? Implications of the last decade of archaeology in Israel", +"volume": "14", +"author": "Kletter", +"year": "2001", +"journal-title": "J. Mediterr. Archaeol." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib20", +"first-page": "299", +"article-title": "Testing Google Earth Engine for the automatic identification and vectorization of archaeological features: a case study from Faynan, Jordan", +"volume": "15", +"author": "Liss", +"year": "2017", +"journal-title": "J. Archaeol. Sci.: Report" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib21", +"series-title": "Geographical Information Systems and Landscape Archaeology", +"first-page": "55", +"article-title": "Towards a methodology for modelling surface survey data: the sangro valley project", +"author": "Lock", +"year": "1999" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib22", +"series-title": "Extracting Meaning from Ploughsoil Assemblages", +"first-page": "5", +"article-title": "Methods of collection recording and quantification", +"author": "Mattingly", +"year": "2000" +}, +{ +"issue": "14", +"key": "10.1016/j.jas.2019.105013_bib23", +"doi-asserted-by": "crossref", +"first-page": "E778", +"DOI": "10.1073/pnas.1115472109", +"article-title": "Mapping patterns of long-term settlement in Northern Mesopotamia at a large scale", +"volume": "109", +"author": "Menze", +"year": "2012", +"journal-title": "Proc. Natl. Acad. Sci." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib24", +"doi-asserted-by": "crossref", +"first-page": "80", +"DOI": "10.1016/j.jas.2015.04.002", +"article-title": "A supervised machine-learning approach towards geochemical predictive modelling in archaeology", +"volume": "59", +"author": "Oonk", +"year": "2015", +"journal-title": "J. Archaeol. Sci." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib25", +"doi-asserted-by": "crossref", +"first-page": "49", +"DOI": "10.1016/j.isprsjprs.2012.07.005", +"article-title": "Combining terrestrial stereophotogrammetry, DGPS and GIS-based 3D voxel modelling in the volumetric recording of archaeological features", +"volume": "76", +"author": "Orengo", +"year": "2013", +"journal-title": "ISPRS J. Photogrammetry Remote Sens." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib26", +"doi-asserted-by": "crossref", +"first-page": "100", +"DOI": "10.1016/j.jas.2015.10.008", +"article-title": "Photogrammetric re-discovery of the Eastern Thessalian hidden long-term landscapes", +"volume": "64", +"author": "Orengo", +"year": "2015", +"journal-title": "J. Archaeol. Sci." +}, +{ +"issue": "3", +"key": "10.1016/j.jas.2019.105013_bib27", +"doi-asserted-by": "crossref", +"first-page": "479", +"DOI": "10.3764/aja.122.3.0479", +"article-title": "Towards a definition of Minoan agro-pastoral landscapes: results of the survey at Palaikastro (Crete)", +"volume": "122", +"author": "Orengo", +"year": "2018", +"journal-title": "Am. J. Archaeol." +}, +{ +"issue": "7", +"key": "10.1016/j.jas.2019.105013_bib28", +"doi-asserted-by": "crossref", +"first-page": "735", +"DOI": "10.3390/rs9070735", +"article-title": "Large-scale, multi-temporal remote sensing of palaeo-river networks: a case study from Northwest India and its implications for the Indus civilisation", +"volume": "9", +"author": "Orengo", +"year": "2017", +"journal-title": "Remote Sens." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib29", +"doi-asserted-by": "crossref", +"first-page": "1361", +"DOI": "10.1002/esp.4317", +"article-title": "Multi-scale relief model (MSRM): a new algorithm for the visualization of subtle topographic change of variable size in digital elevation models", +"volume": "43", +"author": "Orengo", +"year": "2018", +"journal-title": "Earth Surf. Process. Landforms" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib30", +"series-title": "Submitted to Proceedings of the National Academy of Sciences", +"article-title": "Living on the edge of the desert: automated detection of archaeological mounds in Cholistan (Pakistan) using machine learning classification of multi-sensor multi-temporal satellite data", +"author": "Orengo", +"year": "2019" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib31", +"first-page": "154", +"article-title": "How many trees in a random forest?", +"volume": "vol. 7376", +"author": "Oshiro", +"year": "2012" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib32", +"article-title": "Decision-making in modern surveys", +"volume": "ume 1", +"author": "Plog", +"year": "1978" +}, +{ +"issue": "4", +"key": "10.1016/j.jas.2019.105013_bib33", +"doi-asserted-by": "crossref", +"first-page": "100", +"DOI": "10.3390/geosciences7040100", +"article-title": "From above and on the ground: geospatial methods for recording endangered archaeology in the Middle East and north africa", +"volume": "7", +"author": "Rayne", +"year": "2017", +"journal-title": "Geosciences" +}, +{ +"issue": "1", +"key": "10.1016/j.jas.2019.105013_bib34", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.1080/00438243.1978.9979712", +"article-title": "The design of archaeological surveys", +"volume": "10", +"author": "Schiffer", +"year": "1978", +"journal-title": "World Archaeol." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib35", +"series-title": "Experiments in the Collection and Analysis of Archaeological Survey Data: the East Hampshire Survey", +"author": "Shennan", +"year": "1985" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib36", +"doi-asserted-by": "crossref", +"first-page": "1066", +"DOI": "10.1016/j.culher.2016.06.006", +"article-title": "Drones over Mediterranean landscapes. The potential of small UAV's (drones) for site detection and heritage management in archaeological survey projects: a case study from Le Pianelle in the Tappino Valley, Molise (Italy)", +"volume": "22", +"author": "Stek", +"year": "2016", +"journal-title": "J. Cult. Herit." +}, +{ +"key": "10.1016/j.jas.2019.105013_bib37", +"series-title": "Side-by-Side Survey. Comparative Regional Studies in the Mediterranean World", +"first-page": "65", +"article-title": "Side-by-side and back to front: exploring intra-regional latitudinal and longitudinal comparability in survey data. Three case studies from Metaponto, southern Italy", +"author": "Thomson", +"year": "2004" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib38", +"series-title": "Digital Discovery. Exploring New Frontiers in Human Heritage. Computer Applications and Quantitative Methods in Archaeology", +"article-title": "Computer vision and machine learning for archaeology", +"author": "van der Maaten", +"year": "2007" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib39", +"doi-asserted-by": "crossref", +"first-page": "1114", +"DOI": "10.1111/j.1475-4754.2012.00667.x", +"article-title": "Computer vision-based orthophoto mapping of complex archaeological sites: the ancient quarry of Pitaranha (Portugal-Spain)", +"volume": "54", +"author": "Verhoeven", +"year": "2012", +"journal-title": "Archaeometry" +}, +{ +"key": "10.1016/j.jas.2019.105013_bib40", +"series-title": "A Guide for Salvage Archeology", +"author": "Wendorf", +"year": "1962" +} +], +"container-title": [ +"Journal of Archaeological Science" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "https://api.elsevier.com/content/article/PII:S0305440319301001?httpAccept=text/xml", +"content-type": "text/xml", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "https://api.elsevier.com/content/article/PII:S0305440319301001?httpAccept=text/plain", +"content-type": "text/plain", +"content-version": "vor", +"intended-application": "text-mining" +} +], +"deposited": { +"date-parts": [ +[ +2019, +11, +25 +] +], +"date-time": "2019-11-25T06:46:34Z", +"timestamp": 1574664394000 +}, +"score": 1, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2019, +12 +] +] +}, +"references-count": 39, +"alternative-id": [ +"S0305440319301001" +], +"URL": "http://dx.doi.org/10.1016/j.jas.2019.105013", +"relation": { + +}, +"ISSN": [ +"0305-4403" +], +"issn-type": [ +{ +"value": "0305-4403", +"type": "print" +} +], +"subject": [ +"Archaeology", +"Archaeology" +], +"published": { +"date-parts": [ +[ +2019, +12 +] +] +}, +"assertion": [ +{ +"value": "Elsevier", +"name": "publisher", +"label": "This article is maintained by" +}, +{ +"value": "A brave new world for archaeological survey: Automated machine learning-based potsherd detection using high-resolution drone imagery", +"name": "articletitle", +"label": "Article Title" +}, +{ +"value": "Journal of Archaeological Science", +"name": "journaltitle", +"label": "Journal Title" +}, +{ +"value": "https://doi.org/10.1016/j.jas.2019.105013", +"name": "articlelink", +"label": "CrossRef DOI link to publisher maintained version" +}, +{ +"value": "article", +"name": "content_type", +"label": "Content Type" +}, +{ +"value": "© 2019 The Authors. Published by Elsevier Ltd.", +"name": "copyright", +"label": "Copyright" +} +], +"article-number": "105013" +} From d50057b2d92ab99407461d37490bff19b6608346 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Nov 2021 16:59:37 +0100 Subject: [PATCH 31/60] [DOIBoost Mapping] changed the way to create the url for the instance: we use the crooref guidelines https://doi.org/doi --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- .../java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 4c06d283a6..1b1c850ba3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -206,7 +206,7 @@ case object Crossref2Oaf { else { instance.setDateofacceptance(asField(createdDate.getValue)) } - val s: List[String] = List((json \ "URL").extract[String]) + val s: List[String] = List("https://doi.org/" + doi) // val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct // if (links.nonEmpty) { // instance.setUrl(links.asJava) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index f6d5e124ea..5ef92cfa47 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -629,7 +629,7 @@ class CrossrefMappingTest { assertEquals(1, item.getInstance().size()) assertEquals(1, item.getInstance().get(0).getUrl().size()) - assertEquals("http://dx.doi.org/10.1016/j.jas.2019.105013", item.getInstance().get(0).getUrl().get(0)) + assertEquals("https://doi.org/10.1016/j.jas.2019.105013", item.getInstance().get(0).getUrl().get(0)) //println(mapper.writeValueAsString(item)) } From de63d29b6f5b6cdc0a45df94f5ade5f9c4c28711 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 4 Nov 2021 16:16:40 +0100 Subject: [PATCH 32/60] [DOIBoost Mapping] Fix to avoid to produce results with null as identifier (probably due to the filtering function in the factory for the creation of the id) --- .../dnetlib/doiboost/mag/SparkProcessMAG.scala | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index c011cbd208..fa3be973de 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -164,12 +164,18 @@ object SparkProcessMAG { .write.mode(SaveMode.Overwrite) .save(s"$workingPath/mag_publication") + spark.read.load(s"$workingPath/mag_publication").as[Publication] + .filter(p => p.getId == null) + .groupByKey(p => p.getId) + .reduceGroups((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) + .map(_._2) + .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") - val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] - .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) - .map(_._2) - - spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") +// val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] +// .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) +// .map(_._2) +// +// spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") } } From df7ee77c7afd7af06f90ebaac447decc665f2428 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 4 Nov 2021 16:24:07 +0100 Subject: [PATCH 33/60] [DOIBoost Mapping] removed not needed comments --- .../doiboost/mag/SparkProcessMAG.scala | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index fa3be973de..016279787d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -114,25 +114,6 @@ object SparkProcessMAG { .save(s"$workingPath/merge_step_3") - //no more needed to add the instance to mag records -// magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication] -// .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] -// -// val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl] -// -// -// -// logger.info("Phase 5) enrich publication with URL and Instances") -// magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left") -// .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) } -// .write.mode(SaveMode.Overwrite) -// .save(s"$workingPath/merge_step_3") - - -// logger.info("Phase 6) Enrich Publication with description") -// val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] -// pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") - val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract] @@ -171,11 +152,7 @@ object SparkProcessMAG { .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") -// val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] -// .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) -// .map(_._2) -// -// spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") + } } From a7d50c499bb069d030e0fbeec1cb4b4986981d98 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 8 Nov 2021 16:44:19 +0100 Subject: [PATCH 34/60] [BypassAS] prepare FOS subject, test and model for FOS and BipFinder scores --- .../DistributeFOSSparkJob.java | 84 +++++ .../dhp/bypassactionset/GetFOSData.java | 79 +++++ .../bypassactionset/model/BipDeserialize.java | 28 ++ .../bypassactionset/model/FOSDataModel.java | 71 ++++ .../dhp/bypassactionset/model/KeyValue.java | 26 ++ .../dhp/bypassactionset/model/Score.java | 30 ++ .../distribute_fos_parameters.json | 21 ++ .../bypassactionset/get_fos_parameters.json | 27 ++ .../dhp/bypassactionset/GetFOSTest.java | 332 ++++++++++++++++++ .../dnetlib/dhp/bypassactionset/fos/fos.json | 38 ++ .../dhp/bypassactionset/fos/h2020_fos_sbs.csv | 38 ++ 11 files changed, 774 insertions(+) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/DistributeFOSSparkJob.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/GetFOSData.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipDeserialize.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/KeyValue.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/Score.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/DistributeFOSSparkJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/DistributeFOSSparkJob.java new file mode 100644 index 0000000000..8c2c56be9a --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/DistributeFOSSparkJob.java @@ -0,0 +1,84 @@ +package eu.dnetlib.dhp.bypassactionset; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class DistributeFOSSparkJob implements Serializable { + private static final Logger log = LoggerFactory.getLogger(DistributeFOSSparkJob.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + DistributeFOSSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String sourcePath = parser.get("sourcePath"); + log.info("sourcePath: {}", sourcePath); + + + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + distributeFOSdois( + spark, + sourcePath, + + outputPath + ); + }); + } + + private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { + Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); + + fosDataset.flatMap((FlatMapFunction) v -> { + List fosList = new ArrayList<>(); + final String level1 = v.getLevel1(); + final String level2 = v.getLevel2(); + final String level3 = v.getLevel3(); + Arrays.stream(v.getDoi().split("\u0002")).forEach(d -> fosList.add(FOSDataModel.newInstance(d, level1, level2, level3))); + return fosList.iterator(); + }, Encoders.bean(FOSDataModel.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(outputPath); + } + + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/GetFOSData.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/GetFOSData.java new file mode 100644 index 0000000000..849bf1c6a4 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/GetFOSData.java @@ -0,0 +1,79 @@ +package eu.dnetlib.dhp.bypassactionset; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +import eu.dnetlib.dhp.common.collection.GetCSV; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; + +import java.util.Objects; +import java.util.Optional; + +public class GetFOSData implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(GetFOSData.class); + + public static final char DEFAULT_DELIMITER = '\t'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + GetFOSData.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json")))); + + parser.parseArgument(args); + + //the path where the original fos csv file is stored + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}", inputPath); + + //the path where to put the file as json + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + new GetFOSData().doRewrite(inputPath, outputFile, classForName, delimiter, fileSystem); + + } + + public void doRewrite(String inputPath, String outputFile, String classForName, char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException{ + + //reads the csv and writes it as its json equivalent + try (InputStreamReader reader = new InputStreamReader(fs.open(new Path (inputPath)))) { + GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); + } + + + + } + + + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipDeserialize.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipDeserialize.java new file mode 100644 index 0000000000..a2c7601a88 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipDeserialize.java @@ -0,0 +1,28 @@ + +package eu.dnetlib.dhp.bypassactionset.model; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +/** + * Class that maps the model of the bipFinder! input data. + * Only needed for deserialization purposes + */ + +public class BipDeserialize extends HashMap> implements Serializable { + + public BipDeserialize() { + super(); + } + + public List get(String key) { + + if (super.get(key) == null) { + return new ArrayList<>(); + } + return super.get(key); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java new file mode 100644 index 0000000000..4f80d98dc3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java @@ -0,0 +1,71 @@ +package eu.dnetlib.dhp.bypassactionset.model; + +import com.opencsv.bean.CsvBindByPosition; + +import java.io.Serializable; + +public class FOSDataModel implements Serializable { + @CsvBindByPosition(position = 1) +// @CsvBindByName(column = "doi") + private String doi; + + @CsvBindByPosition(position = 2) +// @CsvBindByName(column = "level1") + private String level1; + + @CsvBindByPosition(position = 3) +// @CsvBindByName(column = "level2") + private String level2; + + @CsvBindByPosition(position = 4) +// @CsvBindByName(column = "level3") + private String level3; + + + public FOSDataModel() { + + } + + public FOSDataModel(String doi, String level1, String level2, String level3) { + this.doi = doi; + this.level1 = level1; + this.level2 = level2; + this.level3 = level3; + } + + public static FOSDataModel newInstance(String d, String level1, String level2, String level3) { + return new FOSDataModel(d, level1, level2, level3); + } + + public String getDoi() { + return doi; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + public String getLevel1() { + return level1; + } + + public void setLevel1(String level1) { + this.level1 = level1; + } + + public String getLevel2() { + return level2; + } + + public void setLevel2(String level2) { + this.level2 = level2; + } + + public String getLevel3() { + return level3; + } + + public void setLevel3(String level3) { + this.level3 = level3; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/KeyValue.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/KeyValue.java new file mode 100644 index 0000000000..2f3379308f --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/KeyValue.java @@ -0,0 +1,26 @@ + +package eu.dnetlib.dhp.bypassactionset.model; + +import java.io.Serializable; + +public class KeyValue implements Serializable { + + private String key; + private String value; + + public String getKey() { + return key; + } + + public void setKey(String key) { + this.key = key; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/Score.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/Score.java new file mode 100644 index 0000000000..76580fb239 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/Score.java @@ -0,0 +1,30 @@ + +package eu.dnetlib.dhp.bypassactionset.model; + +import java.io.Serializable; +import java.util.List; + +/** + * represents the score in the input file + */ +public class Score implements Serializable { + + private String id; + private List unit; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public List getUnit() { + return unit; + } + + public void setUnit(List unit) { + this.unit = unit; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json new file mode 100644 index 0000000000..17b1a95c0a --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json @@ -0,0 +1,21 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"out", + "paramLongName":"outputPath", + "paramDescription": "the output path", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json new file mode 100644 index 0000000000..8b663e080c --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json @@ -0,0 +1,27 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"out", + "paramLongName":"outputPath", + "paramDescription": "the output path", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "hnn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java new file mode 100644 index 0000000000..6e05ced648 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java @@ -0,0 +1,332 @@ +package eu.dnetlib.dhp.bypassactionset; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; + +import java.nio.file.Path; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class GetFOSTest { + private static final Logger log = LoggerFactory.getLogger(GetFOSTest.class); + + private static Path workingDir; + private static SparkSession spark; + private static LocalFileSystem fs; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(CountryPropagationJobTest.class.getSimpleName()); + + fs = FileSystem.getLocal(new Configuration()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(GetFOSTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(CountryPropagationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + + @Test + void getFOSFileTest() throws CollectorException, IOException, ClassNotFoundException { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv") + .getPath(); + final String outputPath = workingDir.toString() + "/fos.json"; + + + + + new GetFOSData() + .doRewrite(sourcePath, outputPath, "eu.dnetlib.dhp.bypassactionset.FOSDataModel", '\t',fs ); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class); + + System.out.println(new ObjectMapper().writeValueAsString(fos)); + count += 1; + } + + assertEquals(38, count); + + + } + + + @Test + void distributeDoiTest() throws Exception{ + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos.json") + .getPath(); + + DistributeFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + + "-outputPath", workingDir.toString() + "/distribute" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/distribute") + .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); + + + assertEquals(50, tmp.count()); + assertEquals(1, tmp.filter(row -> row.getDoi().equals("10.3390/s18072310")).count()); + assertEquals("engineering and technology", tmp.filter(r -> r.getDoi().equals("10.3390/s18072310")).collect().get(0).getLevel1()); + assertEquals("nano-technology", tmp.filter(r -> r.getDoi().equals("10.3390/s18072310")).collect().get(0).getLevel2()); + assertEquals("nanoscience & nanotechnology", tmp.filter(r -> r.getDoi().equals("10.3390/s18072310")).collect().get(0).getLevel3()); + + assertEquals(1, tmp.filter(row -> row.getDoi().equals("10.1111/1365-2656.12831")).count()); + assertEquals("social sciences", tmp.filter(r -> r.getDoi().equals("10.1111/1365-2656.12831")).collect().get(0).getLevel1()); + assertEquals("psychology and cognitive sciences", tmp.filter(r -> r.getDoi().equals("10.1111/1365-2656.12831")).collect().get(0).getLevel2()); + assertEquals("NULL", tmp.filter(r -> r.getDoi().equals("10.1111/1365-2656.12831")).collect().get(0).getLevel3()); + +// {"doi":"10.1111/1365-2656.12831\u000210.17863/cam.24369","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} + + } + + /** + * @Test + * void testCountryPropagationSoftware() throws Exception { + * final String sourcePath = getClass() + * .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software") + * .getPath(); + * final String preparedInfoPath = getClass() + * .getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo") + * .getPath(); + * SparkCountryPropagationJob + * .main( + * new String[] { + * "--isSparkSessionManaged", Boolean.FALSE.toString(), + * "--sourcePath", sourcePath, + * "-saveGraph", "true", + * "-resultTableName", Software.class.getCanonicalName(), + * "-outputPath", workingDir.toString() + "/software", + * "-preparedInfoPath", preparedInfoPath + * }); + * + * final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + * + * JavaRDD tmp = sc + * .textFile(workingDir.toString() + "/software") + * .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); + * + * // tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s)); + * + * Assertions.assertEquals(10, tmp.count()); + * + * Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Software.class)); + * + * Assertions.assertEquals(6, verificationDs.filter("size(country) > 0").count()); + * Assertions.assertEquals(3, verificationDs.filter("size(country) = 1").count()); + * Assertions.assertEquals(3, verificationDs.filter("size(country) = 2").count()); + * Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count()); + * + * Dataset countryExploded = verificationDs + * .flatMap( + * (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) + * .map((MapFunction) Qualifier::getClassid, Encoders.STRING()); + * + * Assertions.assertEquals(9, countryExploded.count()); + * + * Assertions.assertEquals(1, countryExploded.filter("value = 'FR'").count()); + * Assertions.assertEquals(1, countryExploded.filter("value = 'TR'").count()); + * Assertions.assertEquals(2, countryExploded.filter("value = 'IT'").count()); + * Assertions.assertEquals(1, countryExploded.filter("value = 'US'").count()); + * Assertions.assertEquals(1, countryExploded.filter("value = 'MX'").count()); + * Assertions.assertEquals(1, countryExploded.filter("value = 'CH'").count()); + * Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count()); + * + * Dataset> countryExplodedWithCountryclassid = verificationDs + * .flatMap((FlatMapFunction>) row -> { + * List> prova = new ArrayList<>(); + * List countryList = row.getCountry(); + * countryList + * .forEach( + * c -> prova + * .add( + * new Tuple2<>( + * row.getId(), c.getClassid()))); + * return prova.iterator(); + * }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + * + * Assertions.assertEquals(9, countryExplodedWithCountryclassid.count()); + * + * countryExplodedWithCountryclassid.show(false); + * Assertions + * .assertEquals( + * 1, + * countryExplodedWithCountryclassid + * .filter( + * "_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'FR' ") + * .count()); + * Assertions + * .assertEquals( + * 1, + * countryExplodedWithCountryclassid + * .filter( + * "_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'TR' ") + * .count()); + * Assertions + * .assertEquals( + * 2, + * countryExplodedWithCountryclassid + * .filter( + * "_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'IT' or _2 = 'MX') ") + * .count()); + * Assertions + * .assertEquals( + * 2, + * countryExplodedWithCountryclassid + * .filter( + * "_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'IT' or _2 = 'US') ") + * .count()); + * Assertions + * .assertEquals( + * 1, + * countryExplodedWithCountryclassid + * .filter( + * "_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'JP'") + * .count()); + * Assertions + * .assertEquals( + * 2, + * countryExplodedWithCountryclassid + * .filter( + * "_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'CH' or _2 = 'JP') ") + * .count()); + * + * Dataset> countryExplodedWithCountryclassname = verificationDs + * .flatMap( + * (FlatMapFunction>) row -> { + * List> prova = new ArrayList<>(); + * List countryList = row.getCountry(); + * countryList + * .forEach( + * c -> prova + * .add( + * new Tuple2<>( + * row.getId(), + * c.getClassname()))); + * return prova.iterator(); + * }, + * Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + * + * countryExplodedWithCountryclassname.show(false); + * Assertions + * .assertEquals( + * 1, + * countryExplodedWithCountryclassname + * .filter( + * "_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'France' ") + * .count()); + * Assertions + * .assertEquals( + * 1, + * countryExplodedWithCountryclassname + * .filter( + * "_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'Turkey' ") + * .count()); + * Assertions + * .assertEquals( + * 2, + * countryExplodedWithCountryclassname + * .filter( + * "_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'Italy' or _2 = 'Mexico') ") + * .count()); + * Assertions + * .assertEquals( + * 2, + * countryExplodedWithCountryclassname + * .filter( + * "_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'Italy' or _2 = 'United States') ") + * .count()); + * Assertions + * .assertEquals( + * 1, + * countryExplodedWithCountryclassname + * .filter( + * "_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'Japan' ") + * .count()); + * Assertions + * .assertEquals( + * 2, + * countryExplodedWithCountryclassname + * .filter( + * "_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'Switzerland' or _2 = 'Japan') ") + * .count()); + * + * Dataset> countryExplodedWithCountryProvenance = verificationDs + * .flatMap( + * (FlatMapFunction>) row -> { + * List> prova = new ArrayList<>(); + * List countryList = row.getCountry(); + * countryList + * .forEach( + * c -> prova + * .add( + * new Tuple2<>( + * row.getId(), + * c + * .getDataInfo() + * .getInferenceprovenance()))); + * return prova.iterator(); + * }, + * Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + * + * Assertions + * .assertEquals( + * 7, countryExplodedWithCountryProvenance.filter("_2 = 'propagation'").count()); + * } + */ +} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos.json new file mode 100644 index 0000000000..1b46a3d259 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos.json @@ -0,0 +1,38 @@ +{"doi":"10.3390/s18072310","level1":"engineering and technology","level2":"nano-technology","level3":"nanoscience & nanotechnology"} +{"doi":"10.1111/1365-2656.12831\u000210.17863/cam.24369","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} +{"doi":"10.3929/ethz-b-000187584\u000210.1002/chem.201701644","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"10.1080/01913123.2017.1367361","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} +{"doi":"10.1051/e3sconf/20199207011","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"environmental sciences"} +{"doi":"10.1038/onc.2015.333","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} +{"doi":"10.1093/mnras/staa256","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} +{"doi":"10.1016/j.jclepro.2018.07.166","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} +{"doi":"10.1103/physrevlett.125.037403","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} +{"doi":"10.1080/03602532.2017.1316285","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"10.1001/jamanetworkopen.2019.1868","level1":"medical and health sciences","level2":"other medical science","level3":"health policy & services"} +{"doi":"10.1128/mra.00874-18","level1":"natural sciences","level2":"biological sciences","level3":"plant biology & botany"} +{"doi":"10.1016/j.nancom.2018.03.001","level1":"engineering and technology","level2":"NULL","level3":"NULL"} +{"doi":"10.1112/topo.12174","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"10.12688/wellcomeopenres.15846.1","level1":"medical and health sciences","level2":"health sciences","level3":"NULL"} +{"doi":"10.21468/scipostphys.3.1.001","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} +{"doi":"10.1088/1741-4326/ab6c77","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} +{"doi":"10.1109/tpwrs.2019.2944747","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} +{"doi":"10.1016/j.expthermflusci.2019.109994\u000210.17863/cam.46212","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"} +{"doi":"10.1109/tc.2018.2860012","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"computer hardware & architecture"} +{"doi":"10.1002/mma.6622","level1":"natural sciences","level2":"mathematics","level3":"numerical & computational mathematics"} +{"doi":"10.1051/radiopro/2020020","level1":"natural sciences","level2":"chemical sciences","level3":"NULL"} +{"doi":"10.1007/s12268-019-1003-4","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} +{"doi":"10.3390/cancers12010236","level1":"medical and health sciences","level2":"health sciences","level3":"biochemistry & molecular biology"} +{"doi":"10.6084/m9.figshare.9912614\u000210.6084/m9.figshare.9912614.v1\u000210.1080/00268976.2019.1665199","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} +{"doi":"10.1175/jpo-d-17-0239.1","level1":"natural sciences","level2":"biological sciences","level3":"marine biology & hydrobiology"} +{"doi":"10.1007/s13218-020-00674-7","level1":"engineering and technology","level2":"industrial biotechnology","level3":"industrial engineering & automation"} +{"doi":"10.1016/j.psyneuen.2016.02.003\u000210.1016/j.psyneuen.2016.02.00310.7892/boris.78886\u000210.7892/boris.78886","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} +{"doi":"10.1109/ted.2018.2813542","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} +{"doi":"10.3989/scimar.04739.25a","level1":"natural sciences","level2":"biological sciences","level3":"NULL"} +{"doi":"10.3390/su12187503","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"NULL"} +{"doi":"10.1016/j.ccell.2018.08.017","level1":"medical and health sciences","level2":"basic medicine","level3":"biochemistry & molecular biology"} +{"doi":"10.1103/physrevresearch.2.023322","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} +{"doi":"10.1039/c8cp03234c","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"10.5281/zenodo.3696557\u000210.5281/zenodo.3696556\u000210.1109/jsac.2016.2545384","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} +{"doi":"10.1038/ng.3667\u000210.1038/ng.3667.\u000210.17615/tct6-4m26\u000210.17863/cam.15649","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} +{"doi":"10.1016/j.jclepro.2019.119065","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} +{"doi":"10.1111/pce.13392","level1":"agricultural and veterinary sciences","level2":"agriculture, forestry, and fisheries","level3":"agronomy & agriculture"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv new file mode 100644 index 0000000000..e874353e8f --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv @@ -0,0 +1,38 @@ +dedup_wf_001::ddcc7a56fa13e49bcc59c6bdd19ad26c 10.3390/s18072310 engineering and technology nano-technology nanoscience & nanotechnology +dedup_wf_001::b76062d56e28224eac56111a4e1e5ecf 10.1111/1365-2656.1283110.17863/cam.24369 social sciences psychology and cognitive sciences NULL +dedup_wf_001::bb752acb8f403a25fa7851a302f7b7ac 10.3929/ethz-b-00018758410.1002/chem.201701644 natural sciences NULL NULL +dedup_wf_001::2f1435a9201ecf5cbbcb12c9b2d971cd 10.1080/01913123.2017.1367361 medical and health sciences clinical medicine oncology & carcinogenesis +dedup_wf_001::fc9e47ec16c67b101724320d4b030514 10.1051/e3sconf/20199207011 natural sciences earth and related environmental sciences environmental sciences +dedup_wf_001::caa1e5b4de387cb31751552f4f0f5d72 10.1038/onc.2015.333 medical and health sciences clinical medicine oncology & carcinogenesis +dedup_wf_001::c2a98df5637d69bf0524eaf40fe6bf11 10.1093/mnras/staa256 natural sciences physical sciences NULL +dedup_wf_001::c221262bdc77cbfd59859a402f0e3991 10.1016/j.jclepro.2018.07.166 engineering and technology other engineering and technologies building & construction +doiboost____::d56d9dc21f317b3e009d5b6c8ea87212 10.1103/physrevlett.125.037403 natural sciences physical sciences nuclear & particles physics +dedup_wf_001::8a7269c8ee6470b2fb4fd384bc389e08 10.1080/03602532.2017.1316285 natural sciences NULL NULL +dedup_wf_001::28342ebbc19833e4e1f4a2b23cf5ee20 10.1001/jamanetworkopen.2019.1868 medical and health sciences other medical science health policy & services +dedup_wf_001::c1e1daf2b55dd9ec8e1c7c7458bbc7bc 10.1128/mra.00874-18 natural sciences biological sciences plant biology & botany +dedup_wf_001::a2ef4a2720c71907180750e5871298ef 10.1016/j.nancom.2018.03.001 engineering and technology NULL NULL +dedup_wf_001::676f46a31519e83a89efcb1c626286fb 10.1112/topo.12174 natural sciences NULL NULL +dedup_wf_001::6f2761642f1e39313388e2c4060657dd 10.12688/wellcomeopenres.15846.1 medical and health sciences health sciences NULL +dedup_wf_001::e414c1dec599521a9635a60de0f6755b 10.21468/scipostphys.3.1.001 natural sciences physical sciences NULL +dedup_wf_001::f3395fe0f330164ea424dc61c86c9a3d 10.1088/1741-4326/ab6c77 natural sciences physical sciences nuclear & particles physics +dedup_wf_001::a4f32a97a783117012f1de11797e73f2 10.1109/tpwrs.2019.2944747 engineering and technology electrical engineering, electronic engineering, information engineering electrical & electronic engineering +dedup_wf_001::313ae1cd083ae1696d12dd1909f97df8 10.1016/j.expthermflusci.2019.10999410.17863/cam.46212 engineering and technology mechanical engineering mechanical engineering & transports +dedup_wf_001::2a300a7d3ca7347791ebcef986bc0682 10.1109/tc.2018.2860012 engineering and technology electrical engineering, electronic engineering, information engineering computer hardware & architecture +doiboost____::5b79bd7bd9f87361b4a4abc3cbb2df75 10.1002/mma.6622 natural sciences mathematics numerical & computational mathematics +dedup_wf_001::6a3f61f217a2519fbaddea1094e3bfc2 10.1051/radiopro/2020020 natural sciences chemical sciences NULL +dedup_wf_001::a3f0430309a639f4234a0e57b10f2dee 10.1007/s12268-019-1003-4 medical and health sciences basic medicine NULL +dedup_wf_001::b6b8a3a1cccbee459cf3343485efdb12 10.3390/cancers12010236 medical and health sciences health sciences biochemistry & molecular biology +dedup_wf_001::dd06ee7974730e7b09a4f03c83b3f9bd 10.6084/m9.figshare.991261410.6084/m9.figshare.9912614.v110.1080/00268976.2019.1665199 natural sciences chemical sciences physical chemistry +dedup_wf_001::027c78bef6f972b5e26dfea55d30fbe3 10.1175/jpo-d-17-0239.1 natural sciences biological sciences marine biology & hydrobiology +dedup_wf_001::43edc179aa9e1fbaf582c5203b18b519 10.1007/s13218-020-00674-7 engineering and technology industrial biotechnology industrial engineering & automation +dedup_wf_001::e7770e11cd6eb514bb52c07b5a8a80f0 10.1016/j.psyneuen.2016.02.00310.1016/j.psyneuen.2016.02.00310.7892/boris.7888610.7892/boris.78886 medical and health sciences basic medicine NULL +dedup_wf_001::80bc15d69bdc589149631f3439dde5aa 10.1109/ted.2018.2813542 engineering and technology electrical engineering, electronic engineering, information engineering electrical & electronic engineering +dedup_wf_001::42c1cfa33e7872944b920cff90f4d99e 10.3989/scimar.04739.25a natural sciences biological sciences NULL +dedup_wf_001::9bacdbbaa9da3658b7243d5de8e3ce14 10.3390/su12187503 natural sciences earth and related environmental sciences NULL +dedup_wf_001::59e43d3527dcfecb6097fbd5740c8950 10.1016/j.ccell.2018.08.017 medical and health sciences basic medicine biochemistry & molecular biology +doiboost____::e024d1b738df3b24bc58fa0228542571 10.1103/physrevresearch.2.023322 natural sciences physical sciences nuclear & particles physics +dedup_wf_001::66e9a3237fa8178886d26d3c2d5b9e66 10.1039/c8cp03234c natural sciences NULL NULL +dedup_wf_001::83737ab4205bae751571bb3b166efa18 10.5281/zenodo.369655710.5281/zenodo.369655610.1109/jsac.2016.2545384 engineering and technology electrical engineering, electronic engineering, information engineering networking & telecommunications +dedup_wf_001::e3f892db413a689e572dd256acad55fe 10.1038/ng.366710.1038/ng.3667.10.17615/tct6-4m2610.17863/cam.15649 medical and health sciences health sciences genetics & heredity +dedup_wf_001::14ba594e8fd081847bc3f50f56335003 10.1016/j.jclepro.2019.119065 engineering and technology other engineering and technologies building & construction +dedup_wf_001::08ac7b33a41bcea2d055ecd8585d632e 10.1111/pce.13392 agricultural and veterinary sciences agriculture, forestry, and fisheries agronomy & agriculture \ No newline at end of file From 6f7ca539c68244a71c88cc770b797f8bdbe5c0e1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 9 Nov 2021 11:25:41 +0100 Subject: [PATCH 35/60] [BypassAS] update of results for bipFinder and FOS --- .../eu/dnetlib/dhp/PropagationConstant.java | 22 ++- .../dhp/bypassactionset/SparkUpdateBip.java | 135 ++++++++++++++++++ .../dhp/bypassactionset/SparkUpdateFOS.java | 121 ++++++++++++++++ .../eu/dnetlib/dhp/bypassactionset/Utils.java | 15 ++ .../bipfinder/PrepareBipFinder.java | 89 ++++++++++++ .../bypassactionset/{ => fos}/GetFOSData.java | 2 +- .../PrepareFOSSparkJob.java} | 18 ++- .../dhp/bypassactionset/model/BipScore.java | 30 ++++ .../dhp/bypassactionset/GetFOSTest.java | 27 ++-- 9 files changed, 443 insertions(+), 16 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateBip.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateFOS.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java rename dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/{ => fos}/GetFOSData.java (98%) rename dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/{DistributeFOSSparkJob.java => fos/PrepareFOSSparkJob.java} (82%) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipScore.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 23e97a97a8..a6562789d2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -25,6 +25,19 @@ public class PropagationConstant { private PropagationConstant() { } + + + public static final String UPDATE_DATA_INFO_TYPE = "update"; + public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; + public static final String UPDATE_SUBJECT_FOS_CLASS_NAME = "Update of results with FOS subjects"; + public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; + public static final String UPDATE_MEASURE_BIP_CLASS_NAME = "Update of results with BipFinder! measures"; + public static final String FOS_CLASS_ID = "fos"; + public static final String FOS_CLASS_NAME = "Subject from fos classification"; + + + + public static final String INSTITUTIONAL_REPO_TYPE = "pubsrepository::institutional"; public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; @@ -75,10 +88,17 @@ public class PropagationConstant { public static DataInfo getDataInfo( String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema) { + + return getDataInfo(inference_provenance, inference_class_id, inference_class_name, qualifierSchema, "0.85"); + } + + public static DataInfo getDataInfo( + String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, + String trust) { DataInfo di = new DataInfo(); di.setInferred(true); di.setDeletedbyinference(false); - di.setTrust("0.85"); + di.setTrust(trust); di.setInferenceprovenance(inference_provenance); di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name, qualifierSchema)); return di; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateBip.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateBip.java new file mode 100644 index 0000000000..737aac0bcd --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateBip.java @@ -0,0 +1,135 @@ + +package eu.dnetlib.dhp.bypassactionset; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bypassactionset.model.BipScore; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +/** + * created the Atomic Action for each tipe of results + */ +public class SparkUpdateBip implements Serializable { + + + private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkUpdateBip.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}: ", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + final String bipScorePath = parser.get("bipScorePath"); + log.info("bipScorePath: {}", bipScorePath); + + final String resultClassName = parser.get("resultTableName"); + log.info("resultTableName: {}", resultClassName); + + Class inputClazz = (Class) Class.forName(resultClassName); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> + updateBipFinder(spark, inputPath, outputPath, bipScorePath, inputClazz) + + ); + } + + private static void updateBipFinder(SparkSession spark, String inputPath, String outputPath, + String bipScorePath, Class inputClazz) { + + Dataset results = readPath(spark, inputPath, inputClazz); + Dataset bipScores = readPath(spark, bipScorePath, BipScore.class); + + results.joinWith(bipScores, results.col("id").equalTo(bipScores.col("id")), "left") + .map((MapFunction, I>) value -> { + if (!Optional.ofNullable(value._2()).isPresent()){ + return value._1(); + } + value._1().setMeasures(getMeasure(value._2())); + return value._1(); + }, Encoders.bean(inputClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(outputPath + "/bip"); + + } + + private static List getMeasure(BipScore value) { + return value + .getScoreList() + .stream() + .map(score -> { + Measure m = new Measure(); + m.setId(score.getId()); + m + .setUnit( + score + .getUnit() + .stream() + .map(unit -> { + KeyValue kv = new KeyValue(); + kv.setValue(unit.getValue()); + kv.setKey(unit.getKey()); + kv.setDataInfo(getDataInfo(UPDATE_DATA_INFO_TYPE, + UPDATE_MEASURE_BIP_CLASS_ID, + UPDATE_MEASURE_BIP_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, "")); + return kv; + }) + .collect(Collectors.toList())); + return m; + }) + .collect(Collectors.toList()); + } + + + + + + + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateFOS.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateFOS.java new file mode 100644 index 0000000000..79d3d6d143 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateFOS.java @@ -0,0 +1,121 @@ +package eu.dnetlib.dhp.bypassactionset; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Measure; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class SparkUpdateFOS implements Serializable { + private static final Logger log = LoggerFactory.getLogger(SparkUpdateFOS.class); + private final static String NULL = "NULL"; + private final static String DNET_RESULT_SUBJECT = "dnet:result_subject"; + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkUpdateFOS.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}: ", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + final String fosPath = parser.get("fosPath"); + log.info("fosPath: {}", fosPath); + + final String resultClassName = parser.get("resultTableName"); + log.info("resultTableName: {}", resultClassName); + + Class inputClazz = (Class) Class.forName(resultClassName); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> + updateFos(spark, inputPath, outputPath, fosPath, inputClazz) + + ); + } + + private static void updateFos(SparkSession spark, String inputPath, String outputPath, + String bipScorePath, Class inputClazz) { + + Dataset results = readPath(spark, inputPath, inputClazz); + Dataset bipScores = readPath(spark, bipScorePath, FOSDataModel.class); + + results.joinWith(bipScores, results.col("id").equalTo(bipScores.col("id")), "left") + .map((MapFunction, I>) value -> { + if (!Optional.ofNullable(value._2()).isPresent()){ + return value._1(); + } + value._1().getSubject().addAll(getSubjects(value._2())); + return value._1(); + }, Encoders.bean(inputClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(outputPath); + + } + + private static List getSubjects(FOSDataModel fos) { + return Arrays.asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3())) + .stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + + private static StructuredProperty getSubject(String sbj) { + if (sbj.equals(NULL)) + return null; + StructuredProperty sp = new StructuredProperty(); + sp.setValue(sbj); + sp.setQualifier(getQualifier(FOS_CLASS_ID, FOS_CLASS_NAME, DNET_RESULT_SUBJECT)); + sp.setDataInfo(getDataInfo(UPDATE_DATA_INFO_TYPE, + UPDATE_SUBJECT_FOS_CLASS_ID, + UPDATE_SUBJECT_FOS_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, "")); + return sp; + + } + + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java new file mode 100644 index 0000000000..79ae871f3c --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java @@ -0,0 +1,15 @@ +package eu.dnetlib.dhp.bypassactionset; + +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import org.jetbrains.annotations.NotNull; + +public class Utils { + private static final String ID_PREFIX = "50|doi_________"; + + @NotNull + public static String getIdentifier(String d) { + return ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", d)); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java new file mode 100644 index 0000000000..7c4f5b859d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java @@ -0,0 +1,89 @@ +package eu.dnetlib.dhp.bypassactionset.bipfinder; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bypassactionset.SparkUpdateBip; +import eu.dnetlib.dhp.bypassactionset.model.BipDeserialize; +import eu.dnetlib.dhp.bypassactionset.model.BipScore; +import eu.dnetlib.dhp.schema.oaf.Result; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.Optional; +import java.util.stream.Collectors; + +import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class PrepareBipFinder implements Serializable { + private static final String DOI = "doi"; + private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkUpdateBip.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}: ", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + + prepareResults(spark, inputPath, outputPath); + }); + } + + private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { + + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD bipDeserializeJavaRDD = sc + .textFile(inputPath) + .map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class)); + + spark + .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { + BipScore bs = new BipScore(); + bs.setId(getIdentifier(key)); + bs.setScoreList(entry.get(key)); + return bs; + }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(outputPath); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/GetFOSData.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java similarity index 98% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/GetFOSData.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java index 849bf1c6a4..dfab8f4095 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/GetFOSData.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.bypassactionset; +package eu.dnetlib.dhp.bypassactionset.fos; import eu.dnetlib.dhp.application.ArgumentApplicationParser; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/DistributeFOSSparkJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java similarity index 82% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/DistributeFOSSparkJob.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java index 8c2c56be9a..eb10ebd13b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/DistributeFOSSparkJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java @@ -1,7 +1,9 @@ -package eu.dnetlib.dhp.bypassactionset; +package eu.dnetlib.dhp.bypassactionset.fos; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -9,6 +11,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -18,16 +21,18 @@ import java.util.Arrays; import java.util.List; import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -public class DistributeFOSSparkJob implements Serializable { - private static final Logger log = LoggerFactory.getLogger(DistributeFOSSparkJob.class); +public class PrepareFOSSparkJob implements Serializable { + private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class); + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - DistributeFOSSparkJob.class + PrepareFOSSparkJob.class .getResourceAsStream( "/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json")); @@ -71,7 +76,8 @@ public class DistributeFOSSparkJob implements Serializable { final String level1 = v.getLevel1(); final String level2 = v.getLevel2(); final String level3 = v.getLevel3(); - Arrays.stream(v.getDoi().split("\u0002")).forEach(d -> fosList.add(FOSDataModel.newInstance(d, level1, level2, level3))); + Arrays.stream(v.getDoi().split("\u0002")).forEach(d -> + fosList.add(FOSDataModel.newInstance(getIdentifier(d), level1, level2, level3))); return fosList.iterator(); }, Encoders.bean(FOSDataModel.class)) .write() @@ -81,4 +87,6 @@ public class DistributeFOSSparkJob implements Serializable { } + + } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipScore.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipScore.java new file mode 100644 index 0000000000..fb2687704a --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipScore.java @@ -0,0 +1,30 @@ + +package eu.dnetlib.dhp.bypassactionset.model; + +import java.io.Serializable; +import java.util.List; + +/** + * Rewriting of the bipFinder input data by extracting the identifier of the result (doi) + */ + +public class BipScore implements Serializable { + private String id; // doi + private List scoreList; // unit as given in the inputfile + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public List getScoreList() { + return scoreList; + } + + public void setScoreList(List scoreList) { + this.scoreList = scoreList; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java index 6e05ced648..5202897489 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java @@ -1,9 +1,13 @@ package eu.dnetlib.dhp.bypassactionset; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.bypassactionset.fos.PrepareFOSSparkJob; +import eu.dnetlib.dhp.bypassactionset.fos.GetFOSData; import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -35,6 +39,7 @@ public class GetFOSTest { private static SparkSession spark; private static LocalFileSystem fs; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final String ID_PREFIX = "50|doi_________"; @BeforeAll public static void beforeAll() throws IOException { @@ -103,7 +108,7 @@ public class GetFOSTest { .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos.json") .getPath(); - DistributeFOSSparkJob + PrepareFOSSparkJob .main( new String[] { "--isSparkSessionManaged", Boolean.FALSE.toString(), @@ -119,17 +124,21 @@ public class GetFOSTest { .textFile(workingDir.toString() + "/distribute") .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); + String doi1 = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.3390/s18072310")); assertEquals(50, tmp.count()); - assertEquals(1, tmp.filter(row -> row.getDoi().equals("10.3390/s18072310")).count()); - assertEquals("engineering and technology", tmp.filter(r -> r.getDoi().equals("10.3390/s18072310")).collect().get(0).getLevel1()); - assertEquals("nano-technology", tmp.filter(r -> r.getDoi().equals("10.3390/s18072310")).collect().get(0).getLevel2()); - assertEquals("nanoscience & nanotechnology", tmp.filter(r -> r.getDoi().equals("10.3390/s18072310")).collect().get(0).getLevel3()); + assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi1)).count()); + assertEquals("engineering and technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel1()); + assertEquals("nano-technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel2()); + assertEquals("nanoscience & nanotechnology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel3()); - assertEquals(1, tmp.filter(row -> row.getDoi().equals("10.1111/1365-2656.12831")).count()); - assertEquals("social sciences", tmp.filter(r -> r.getDoi().equals("10.1111/1365-2656.12831")).collect().get(0).getLevel1()); - assertEquals("psychology and cognitive sciences", tmp.filter(r -> r.getDoi().equals("10.1111/1365-2656.12831")).collect().get(0).getLevel2()); - assertEquals("NULL", tmp.filter(r -> r.getDoi().equals("10.1111/1365-2656.12831")).collect().get(0).getLevel3()); + String doi = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/1365-2656.12831")); + assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi)).count()); + assertEquals("social sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel1()); + assertEquals("psychology and cognitive sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel2()); + assertEquals("NULL", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel3()); // {"doi":"10.1111/1365-2656.12831\u000210.17863/cam.24369","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} From 6477a40670d18d06b8992a8983f4f00c62220e4b Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 9 Nov 2021 11:27:12 +0100 Subject: [PATCH 36/60] implement filter of openCitation --- .../eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 141b7b0736..4b82fe6455 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -59,7 +59,12 @@ object SparkConvertRDDtoDataset { log.info("Converting Relation") - val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation").map(s => mapper.readValue(s, classOf[Relation])).filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin") + + val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation") + .map(s => mapper.readValue(s, classOf[Relation])) + .filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") From 9e214ce0eba8aa6ac2d7c8ce67f61c7494f481e0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 9 Nov 2021 12:07:19 +0100 Subject: [PATCH 37/60] [BypassAS] addition of OC relations --- .../eu/dnetlib/dhp/PropagationConstant.java | 20 ++- .../bipfinder/PrepareBipFinder.java | 1 - .../{ => bipfinder}/SparkUpdateBip.java | 2 +- .../{ => fos}/SparkUpdateFOS.java | 9 +- .../opencitations/GetOpenCitationsRefs.java | 92 ++++++++++ .../opencitations/SparkUpdateOCRels.java | 158 ++++++++++++++++++ 6 files changed, 270 insertions(+), 12 deletions(-) rename dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/{ => bipfinder}/SparkUpdateBip.java (98%) rename dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/{ => fos}/SparkUpdateFOS.java (94%) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index a6562789d2..c9d2495ff4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -25,18 +25,23 @@ public class PropagationConstant { private PropagationConstant() { } - + public static final String DOI = "doi"; + public static final String REF_DOI = ".refs"; public static final String UPDATE_DATA_INFO_TYPE = "update"; public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; public static final String UPDATE_SUBJECT_FOS_CLASS_NAME = "Update of results with FOS subjects"; public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; public static final String UPDATE_MEASURE_BIP_CLASS_NAME = "Update of results with BipFinder! measures"; - public static final String FOS_CLASS_ID = "fos"; + public static final String FOS_CLASS_ID = "FOS"; public static final String FOS_CLASS_NAME = "Subject from fos classification"; + public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations"; + public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations"; + public static final String ID_PREFIX = "50|doi_________::"; + public static final String OC_TRUST = "0.91"; - + public final static String NULL = "NULL"; public static final String INSTITUTIONAL_REPO_TYPE = "pubsrepository::institutional"; @@ -95,8 +100,15 @@ public class PropagationConstant { public static DataInfo getDataInfo( String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, String trust) { + return getDataInfo(inference_provenance, inference_class_id, inference_class_name, qualifierSchema, trust, true); + + } + + public static DataInfo getDataInfo( + String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, + String trust, boolean inferred) { DataInfo di = new DataInfo(); - di.setInferred(true); + di.setInferred(inferred); di.setDeletedbyinference(false); di.setTrust(trust); di.setInferenceprovenance(inference_provenance); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java index 7c4f5b859d..10a00b3e4b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.bypassactionset.bipfinder; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bypassactionset.SparkUpdateBip; import eu.dnetlib.dhp.bypassactionset.model.BipDeserialize; import eu.dnetlib.dhp.bypassactionset.model.BipScore; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateBip.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java similarity index 98% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateBip.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java index 737aac0bcd..d1489bfdf0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateBip.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.bypassactionset; +package eu.dnetlib.dhp.bypassactionset.bipfinder; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bypassactionset.model.BipScore; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateFOS.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java similarity index 94% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateFOS.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java index 79d3d6d143..3c4bd7737e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/SparkUpdateFOS.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java @@ -1,11 +1,9 @@ -package eu.dnetlib.dhp.bypassactionset; +package eu.dnetlib.dhp.bypassactionset.fos; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import org.apache.commons.io.IOUtils; @@ -28,8 +26,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; public class SparkUpdateFOS implements Serializable { private static final Logger log = LoggerFactory.getLogger(SparkUpdateFOS.class); - private final static String NULL = "NULL"; - private final static String DNET_RESULT_SUBJECT = "dnet:result_subject"; + public static void main(String[] args) throws Exception { @@ -108,7 +105,7 @@ public class SparkUpdateFOS implements Serializable { return null; StructuredProperty sp = new StructuredProperty(); sp.setValue(sbj); - sp.setQualifier(getQualifier(FOS_CLASS_ID, FOS_CLASS_NAME, DNET_RESULT_SUBJECT)); + sp.setQualifier(getQualifier(FOS_CLASS_ID, FOS_CLASS_NAME, ModelConstants.DNET_SUBJECT_TYPOLOGIES)); sp.setDataInfo(getDataInfo(UPDATE_DATA_INFO_TYPE, UPDATE_SUBJECT_FOS_CLASS_ID, UPDATE_SUBJECT_FOS_CLASS_NAME, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java new file mode 100644 index 0000000000..5d42cb4c5d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java @@ -0,0 +1,92 @@ +package eu.dnetlib.dhp.bypassactionset.opencitations; + +import java.io.*; +import java.io.Serializable; +import java.util.Objects; +import java.util.zip.GZIPOutputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetOpenCitationsRefs implements Serializable { + private static final Logger log = LoggerFactory.getLogger(GetOpenCitationsRefs.class); + + public static void main(final String[] args) throws IOException, ParseException { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + GetOpenCitationsRefs.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bypassactionset/opencitations/input_parameters.json")))); + + parser.parseArgument(args); + + final String[] inputFile = parser.get("inputFile").split(";"); + log.info("inputFile {}", inputFile.toString()); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + GetOpenCitationsRefs ocr = new GetOpenCitationsRefs(); + + for (String file : inputFile) { + ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem); + } + + } + + private void doExtract(String inputFile, String workingPath, FileSystem fileSystem) + throws IOException { + + final Path path = new Path(inputFile); + + FSDataInputStream oc_zip = fileSystem.open(path); + + int count = 1; + try (ZipInputStream zis = new ZipInputStream(oc_zip)) { + ZipEntry entry = null; + while ((entry = zis.getNextEntry()) != null) { + + if (!entry.isDirectory()) { + String fileName = entry.getName(); + fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count; + count++; + try ( + FSDataOutputStream out = fileSystem + .create(new Path(workingPath + "/COCI/" + fileName + ".gz")); + GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { + + IOUtils.copy(zis, gzipOs); + + } + } + + } + + } + + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java new file mode 100644 index 0000000000..a8ab6b74bc --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java @@ -0,0 +1,158 @@ +package eu.dnetlib.dhp.bypassactionset.opencitations; + + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.*; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +import eu.dnetlib.dhp.schema.common.ModelConstants; + +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; + + + +public class SparkUpdateOCRels implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkUpdateOCRels.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws IOException, ParseException { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + SparkUpdateOCRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json")))); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}", inputPath.toString()); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final boolean shouldDuplicateRels = + Optional.ofNullable(parser.get("shouldDuplicateRels")) + .map(Boolean::valueOf) + .orElse(Boolean.FALSE); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> + addOCRelations(spark, inputPath, outputPath, shouldDuplicateRels) + ); + + } + + private static void addOCRelations(SparkSession spark, String inputPath, String outputPath, + boolean shouldDuplicateRels) { + spark + .sqlContext() + .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) + .flatMap( + (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), + Encoders.bean(Relation.class)) + .filter((FilterFunction) value -> value != null) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + + } + + private static List createRelation(String value, boolean duplicate) { + String[] line = value.split(","); + if (!line[1].startsWith("10.")) { + return new ArrayList<>(); + } + List relationList = new ArrayList<>(); + + String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[1])); + final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[2])); + + relationList + .addAll( + getRelations( + citing, + cited)); + + if (duplicate && line[1].endsWith(REF_DOI)) { + citing = ID_PREFIX + IdentifierFactory + .md5(CleaningFunctions.normalizePidValue(DOI, line[1].substring(0, line[1].indexOf(REF_DOI)))); + relationList.addAll(getRelations(citing, cited)); + } + + return relationList; + } + + private static Collection getRelations(String citing, String cited) { + + return Arrays + .asList( + getRelation(citing, cited, ModelConstants.CITES), + getRelation(cited, citing, ModelConstants.IS_CITED_BY)); + } + + public static Relation getRelation( + String source, + String target, + String relclass) { + Relation r = new Relation(); + r.setCollectedfrom(getCollectedFrom()); + r.setSource(source); + r.setTarget(target); + r.setRelClass(relclass); + r.setRelType(ModelConstants.RESULT_RESULT); + r.setSubRelType(ModelConstants.CITATION); + r + .setDataInfo( + getDataInfo(UPDATE_DATA_INFO_TYPE, OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, OC_TRUST, false)); + return r; + } + + public static List getCollectedFrom() { + KeyValue kv = new KeyValue(); + kv.setKey(ModelConstants.OPENOCITATIONS_ID); + kv.setValue(ModelConstants.OPENOCITATIONS_NAME); + + return Arrays.asList(kv); + } + + + +} From c371b23077d35ff9db366f7ba7a482a042cabe7f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 10 Nov 2021 17:00:37 +0100 Subject: [PATCH 38/60] - --- .../eu/dnetlib/dhp/PropagationConstant.java | 17 +- .../eu/dnetlib/dhp/bypassactionset/Utils.java | 16 +- .../bipfinder/PrepareBipFinder.java | 113 +++--- .../bipfinder/SparkUpdateBip.java | 84 ++--- .../dhp/bypassactionset/fos/GetFOSData.java | 94 +++-- .../fos/PrepareFOSSparkJob.java | 122 +++---- .../bypassactionset/fos/SparkUpdateFOS.java | 163 +++++---- .../bypassactionset/model/FOSDataModel.java | 92 ++--- .../opencitations/GetOpenCitationsRefs.java | 95 ++--- .../opencitations/SparkUpdateOCRels.java | 200 +++++----- .../bip_prepare_parameters.json | 20 + .../bip_update_parameters.json | 32 ++ .../fos_update_parameters.json | 32 ++ .../dnetlib/dhp/bypassactionset/BipTest.java | 250 +++++++++++++ .../dnetlib/dhp/bypassactionset/FOSTest.java | 253 +++++++++++++ .../dhp/bypassactionset/GetFOSTest.java | 341 ------------------ .../dnetlib/dhp/bypassactionset/bip/bip.json | 86 +++++ .../dhp/bypassactionset/bip/preparedbip.json | 86 +++++ .../bypassactionset/bip/publicationmatch.json | 6 + .../bip/publicationnomatch.json | 6 + .../dhp/bypassactionset/fos/fos_prepared.json | 50 +++ .../bypassactionset/fos/publicationmatch.json | 6 + pom.xml | 2 +- 23 files changed, 1319 insertions(+), 847 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/BipTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/FOSTest.java delete mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/bip.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index c9d2495ff4..392a5ab44f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -30,11 +30,11 @@ public class PropagationConstant { public static final String UPDATE_DATA_INFO_TYPE = "update"; public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; - public static final String UPDATE_SUBJECT_FOS_CLASS_NAME = "Update of results with FOS subjects"; + public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE"; public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; - public static final String UPDATE_MEASURE_BIP_CLASS_NAME = "Update of results with BipFinder! measures"; + public static final String FOS_CLASS_ID = "FOS"; - public static final String FOS_CLASS_NAME = "Subject from fos classification"; + public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations"; public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations"; @@ -98,15 +98,16 @@ public class PropagationConstant { } public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, - String trust) { - return getDataInfo(inference_provenance, inference_class_id, inference_class_name, qualifierSchema, trust, true); + String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, + String trust) { + return getDataInfo( + inference_provenance, inference_class_id, inference_class_name, qualifierSchema, trust, true); } public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, - String trust, boolean inferred) { + String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema, + String trust, boolean inferred) { DataInfo di = new DataInfo(); di.setInferred(inferred); di.setDeletedbyinference(false); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java index 79ae871f3c..fa6cc9fab7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java @@ -1,15 +1,17 @@ + package eu.dnetlib.dhp.bypassactionset; +import org.jetbrains.annotations.NotNull; + import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; -import org.jetbrains.annotations.NotNull; public class Utils { - private static final String ID_PREFIX = "50|doi_________"; + private static final String ID_PREFIX = "50|doi_________"; - @NotNull - public static String getIdentifier(String d) { - return ID_PREFIX + - IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", d)); - } + @NotNull + public static String getIdentifier(String d) { + return ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", d)); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java index 10a00b3e4b..7f12847ad6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java @@ -1,10 +1,13 @@ + package eu.dnetlib.dhp.bypassactionset.bipfinder; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bypassactionset.model.BipDeserialize; -import eu.dnetlib.dhp.bypassactionset.model.BipScore; -import eu.dnetlib.dhp.schema.oaf.Result; +import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.Optional; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -15,74 +18,72 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Serializable; -import java.util.Optional; -import java.util.stream.Collectors; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bypassactionset.model.BipDeserialize; +import eu.dnetlib.dhp.bypassactionset.model.BipScore; +import eu.dnetlib.dhp.schema.oaf.Result; public class PrepareBipFinder implements Serializable { - private static final String DOI = "doi"; - private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - String jsonConfiguration = IOUtils - .toString( - SparkUpdateBip.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json")); + public static void main(String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + String jsonConfiguration = IOUtils + .toString( + SparkUpdateBip.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json")); - parser.parseArgument(args); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); + parser.parseArgument(args); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}: ", inputPath); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}: ", inputPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); - SparkConf conf = new SparkConf(); + SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { - prepareResults(spark, inputPath, outputPath); - }); - } + prepareResults(spark, inputPath, outputPath); + }); + } - private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { + private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaRDD bipDeserializeJavaRDD = sc + .textFile(inputPath) + .map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class)); - JavaRDD bipDeserializeJavaRDD = sc - .textFile(inputPath) - .map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class)); - - spark - .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { - BipScore bs = new BipScore(); - bs.setId(getIdentifier(key)); - bs.setScoreList(entry.get(key)); - return bs; - }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath); - } + spark + .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { + BipScore bs = new BipScore(); + bs.setId(getIdentifier(key)); + bs.setScoreList(entry.get(key)); + return bs; + }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java index d1489bfdf0..53913c81b4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java @@ -1,45 +1,44 @@ package eu.dnetlib.dhp.bypassactionset.bipfinder; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bypassactionset.model.BipScore; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; - -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import scala.Tuple2; +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bypassactionset.model.BipScore; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; /** * created the Atomic Action for each tipe of results */ public class SparkUpdateBip implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class); public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - SparkUpdateBip.class + SparkUpdateBip.class .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json")); + "/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -71,10 +70,9 @@ public class SparkUpdateBip implements Serializable { runWithSparkSession( conf, isSparkSessionManaged, - spark -> - updateBipFinder(spark, inputPath, outputPath, bipScorePath, inputClazz) + spark -> updateBipFinder(spark, inputPath, outputPath, bipScorePath, inputClazz) - ); + ); } private static void updateBipFinder(SparkSession spark, String inputPath, String outputPath, @@ -83,18 +81,19 @@ public class SparkUpdateBip implements Serializable { Dataset results = readPath(spark, inputPath, inputClazz); Dataset bipScores = readPath(spark, bipScorePath, BipScore.class); - results.joinWith(bipScores, results.col("id").equalTo(bipScores.col("id")), "left") - .map((MapFunction, I>) value -> { - if (!Optional.ofNullable(value._2()).isPresent()){ - return value._1(); - } - value._1().setMeasures(getMeasure(value._2())); + results + .joinWith(bipScores, results.col("id").equalTo(bipScores.col("id")), "left") + .map((MapFunction, I>) value -> { + if (!Optional.ofNullable(value._2()).isPresent()) { return value._1(); - }, Encoders.bean(inputClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath + "/bip"); + } + value._1().setMeasures(getMeasure(value._2())); + return value._1(); + }, Encoders.bean(inputClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/bip"); } @@ -114,10 +113,13 @@ public class SparkUpdateBip implements Serializable { KeyValue kv = new KeyValue(); kv.setValue(unit.getValue()); kv.setKey(unit.getKey()); - kv.setDataInfo(getDataInfo(UPDATE_DATA_INFO_TYPE, - UPDATE_MEASURE_BIP_CLASS_ID, - UPDATE_MEASURE_BIP_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, "")); + kv + .setDataInfo( + getDataInfo( + UPDATE_DATA_INFO_TYPE, + UPDATE_MEASURE_BIP_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, "")); return kv; }) .collect(Collectors.toList())); @@ -126,10 +128,4 @@ public class SparkUpdateBip implements Serializable { .collect(Collectors.toList()); } - - - - - - } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java index dfab8f4095..9eceb7c5f7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java @@ -1,8 +1,10 @@ + package eu.dnetlib.dhp.bypassactionset.fos; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import java.io.*; +import java.util.Objects; +import java.util.Optional; -import eu.dnetlib.dhp.common.collection.GetCSV; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -10,70 +12,64 @@ import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; - -import java.util.Objects; -import java.util.Optional; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.GetCSV; public class GetFOSData implements Serializable { - private static final Logger log = LoggerFactory.getLogger(GetFOSData.class); + private static final Logger log = LoggerFactory.getLogger(GetFOSData.class); - public static final char DEFAULT_DELIMITER = '\t'; + public static final char DEFAULT_DELIMITER = '\t'; - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - GetFOSData.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json")))); + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + GetFOSData.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json")))); - parser.parseArgument(args); + parser.parseArgument(args); - //the path where the original fos csv file is stored - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}", inputPath); + // the path where the original fos csv file is stored + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}", inputPath); - //the path where to put the file as json - final String outputFile = parser.get("outputFile"); - log.info("outputFile {}", outputFile); + // the path where to put the file as json + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); - final String hdfsNameNode = parser.get("hdfsNameNode"); - log.info("hdfsNameNode {}", hdfsNameNode); + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); - final String classForName = parser.get("classForName"); - log.info("classForName {}", classForName); + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); - final char delimiter = Optional - .ofNullable(parser.get("delimiter")) - .map(s -> s.charAt(0)) - .orElse(DEFAULT_DELIMITER); - log.info("delimiter {}", delimiter); + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(conf); + FileSystem fileSystem = FileSystem.get(conf); - new GetFOSData().doRewrite(inputPath, outputFile, classForName, delimiter, fileSystem); + new GetFOSData().doRewrite(inputPath, outputFile, classForName, delimiter, fileSystem); - } + } - public void doRewrite(String inputPath, String outputFile, String classForName, char delimiter, FileSystem fs) - throws IOException, ClassNotFoundException{ - - //reads the csv and writes it as its json equivalent - try (InputStreamReader reader = new InputStreamReader(fs.open(new Path (inputPath)))) { - GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); - } - - - - } + public void doRewrite(String inputPath, String outputFile, String classForName, char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException { + // reads the csv and writes it as its json equivalent + try (InputStreamReader reader = new InputStreamReader(fs.open(new Path(inputPath)))) { + GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); + } + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java index eb10ebd13b..0f9f00bb5c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java @@ -1,9 +1,15 @@ + package eu.dnetlib.dhp.bypassactionset.fos; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -15,78 +21,66 @@ import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class PrepareFOSSparkJob implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class); + private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class); + public static void main(String[] args) throws Exception { - public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + PrepareFOSSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json")); - String jsonConfiguration = IOUtils - .toString( - PrepareFOSSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); - parser.parseArgument(args); + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - Boolean isSparkSessionManaged = isSparkSessionManaged(parser); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + String sourcePath = parser.get("sourcePath"); + log.info("sourcePath: {}", sourcePath); - String sourcePath = parser.get("sourcePath"); - log.info("sourcePath: {}", sourcePath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + distributeFOSdois( + spark, + sourcePath, + outputPath); + }); + } - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - - - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - distributeFOSdois( - spark, - sourcePath, - - outputPath - ); - }); - } - - private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { - Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); - - fosDataset.flatMap((FlatMapFunction) v -> { - List fosList = new ArrayList<>(); - final String level1 = v.getLevel1(); - final String level2 = v.getLevel2(); - final String level3 = v.getLevel3(); - Arrays.stream(v.getDoi().split("\u0002")).forEach(d -> - fosList.add(FOSDataModel.newInstance(getIdentifier(d), level1, level2, level3))); - return fosList.iterator(); - }, Encoders.bean(FOSDataModel.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath); - } - - + private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) { + Dataset fosDataset = readPath(spark, sourcePath, FOSDataModel.class); + fosDataset.flatMap((FlatMapFunction) v -> { + List fosList = new ArrayList<>(); + final String level1 = v.getLevel1(); + final String level2 = v.getLevel2(); + final String level3 = v.getLevel3(); + Arrays + .stream(v.getDoi().split("\u0002")) + .forEach(d -> fosList.add(FOSDataModel.newInstance(getIdentifier(d), level1, level2, level3))); + return fosList.iterator(); + }, Encoders.bean(FOSDataModel.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java index 3c4bd7737e..4a7bd0f98c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java @@ -1,11 +1,13 @@ + package eu.dnetlib.dhp.bypassactionset.fos; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; -import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -15,104 +17,105 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import scala.Tuple2; -import java.io.Serializable; -import java.util.*; -import java.util.stream.Collectors; - -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - public class SparkUpdateFOS implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkUpdateFOS.class); + private static final Logger log = LoggerFactory.getLogger(SparkUpdateFOS.class); + public static void main(String[] args) throws Exception { - public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkUpdateFOS.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json")); - String jsonConfiguration = IOUtils - .toString( - SparkUpdateFOS.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json")); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); - parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}: ", inputPath); - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}: ", inputPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); + final String fosPath = parser.get("fosPath"); + log.info("fosPath: {}", fosPath); - final String fosPath = parser.get("fosPath"); - log.info("fosPath: {}", fosPath); + final String resultClassName = parser.get("resultTableName"); + log.info("resultTableName: {}", resultClassName); - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); + Class inputClazz = (Class) Class.forName(resultClassName); - Class inputClazz = (Class) Class.forName(resultClassName); + SparkConf conf = new SparkConf(); - SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> updateFos(spark, inputPath, outputPath, fosPath, inputClazz) - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> - updateFos(spark, inputPath, outputPath, fosPath, inputClazz) + ); + } - ); - } + private static void updateFos(SparkSession spark, String inputPath, String outputPath, + String fosPath, Class inputClazz) { - private static void updateFos(SparkSession spark, String inputPath, String outputPath, - String bipScorePath, Class inputClazz) { + Dataset results = readPath(spark, inputPath, inputClazz); + Dataset fosDataModelDataset = readPath(spark, fosPath, FOSDataModel.class); - Dataset results = readPath(spark, inputPath, inputClazz); - Dataset bipScores = readPath(spark, bipScorePath, FOSDataModel.class); + results + .joinWith(fosDataModelDataset, results.col("id").equalTo(fosDataModelDataset.col("doi")), "left") + .map((MapFunction, I>) value -> { + if (!Optional.ofNullable(value._2()).isPresent()) { + return value._1(); + } + value._1().getSubject().addAll(getSubjects(value._2())); + return value._1(); + }, Encoders.bean(inputClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); - results.joinWith(bipScores, results.col("id").equalTo(bipScores.col("id")), "left") - .map((MapFunction, I>) value -> { - if (!Optional.ofNullable(value._2()).isPresent()){ - return value._1(); - } - value._1().getSubject().addAll(getSubjects(value._2())); - return value._1(); - }, Encoders.bean(inputClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath); + } - } + private static List getSubjects(FOSDataModel fos) { + return Arrays + .asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3())) + .stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } - private static List getSubjects(FOSDataModel fos) { - return Arrays.asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3())) - .stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - } - - private static StructuredProperty getSubject(String sbj) { - if (sbj.equals(NULL)) - return null; - StructuredProperty sp = new StructuredProperty(); - sp.setValue(sbj); - sp.setQualifier(getQualifier(FOS_CLASS_ID, FOS_CLASS_NAME, ModelConstants.DNET_SUBJECT_TYPOLOGIES)); - sp.setDataInfo(getDataInfo(UPDATE_DATA_INFO_TYPE, - UPDATE_SUBJECT_FOS_CLASS_ID, - UPDATE_SUBJECT_FOS_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, "")); - return sp; - - } + private static StructuredProperty getSubject(String sbj) { + if (sbj.equals(NULL)) + return null; + StructuredProperty sp = new StructuredProperty(); + sp.setValue(sbj); + sp.setQualifier(getQualifier(FOS_CLASS_ID, FOS_CLASS_NAME, ModelConstants.DNET_SUBJECT_TYPOLOGIES)); + sp + .setDataInfo( + getDataInfo( + UPDATE_DATA_INFO_TYPE, + UPDATE_SUBJECT_FOS_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, "")); + return sp; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java index 4f80d98dc3..564de01659 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java @@ -1,71 +1,71 @@ -package eu.dnetlib.dhp.bypassactionset.model; -import com.opencsv.bean.CsvBindByPosition; +package eu.dnetlib.dhp.bypassactionset.model; import java.io.Serializable; +import com.opencsv.bean.CsvBindByPosition; + public class FOSDataModel implements Serializable { - @CsvBindByPosition(position = 1) + @CsvBindByPosition(position = 1) // @CsvBindByName(column = "doi") - private String doi; + private String doi; - @CsvBindByPosition(position = 2) + @CsvBindByPosition(position = 2) // @CsvBindByName(column = "level1") - private String level1; + private String level1; - @CsvBindByPosition(position = 3) + @CsvBindByPosition(position = 3) // @CsvBindByName(column = "level2") - private String level2; + private String level2; - @CsvBindByPosition(position = 4) + @CsvBindByPosition(position = 4) // @CsvBindByName(column = "level3") - private String level3; + private String level3; + public FOSDataModel() { - public FOSDataModel() { + } - } + public FOSDataModel(String doi, String level1, String level2, String level3) { + this.doi = doi; + this.level1 = level1; + this.level2 = level2; + this.level3 = level3; + } - public FOSDataModel(String doi, String level1, String level2, String level3) { - this.doi = doi; - this.level1 = level1; - this.level2 = level2; - this.level3 = level3; - } + public static FOSDataModel newInstance(String d, String level1, String level2, String level3) { + return new FOSDataModel(d, level1, level2, level3); + } - public static FOSDataModel newInstance(String d, String level1, String level2, String level3) { - return new FOSDataModel(d, level1, level2, level3); - } + public String getDoi() { + return doi; + } - public String getDoi() { - return doi; - } + public void setDoi(String doi) { + this.doi = doi; + } - public void setDoi(String doi) { - this.doi = doi; - } + public String getLevel1() { + return level1; + } - public String getLevel1() { - return level1; - } + public void setLevel1(String level1) { + this.level1 = level1; + } - public void setLevel1(String level1) { - this.level1 = level1; - } + public String getLevel2() { + return level2; + } - public String getLevel2() { - return level2; - } + public void setLevel2(String level2) { + this.level2 = level2; + } - public void setLevel2(String level2) { - this.level2 = level2; - } + public String getLevel3() { + return level3; + } - public String getLevel3() { - return level3; - } - - public void setLevel3(String level3) { - this.level3 = level3; - } + public void setLevel3(String level3) { + this.level3 = level3; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java index 5d42cb4c5d..0882b26d02 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.bypassactionset.opencitations; import java.io.*; @@ -20,73 +21,73 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class GetOpenCitationsRefs implements Serializable { - private static final Logger log = LoggerFactory.getLogger(GetOpenCitationsRefs.class); + private static final Logger log = LoggerFactory.getLogger(GetOpenCitationsRefs.class); - public static void main(final String[] args) throws IOException, ParseException { + public static void main(final String[] args) throws IOException, ParseException { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - GetOpenCitationsRefs.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/opencitations/input_parameters.json")))); + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + GetOpenCitationsRefs.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bypassactionset/opencitations/input_parameters.json")))); - parser.parseArgument(args); + parser.parseArgument(args); - final String[] inputFile = parser.get("inputFile").split(";"); - log.info("inputFile {}", inputFile.toString()); + final String[] inputFile = parser.get("inputFile").split(";"); + log.info("inputFile {}", inputFile.toString()); - final String workingPath = parser.get("workingPath"); - log.info("workingPath {}", workingPath); + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); - final String hdfsNameNode = parser.get("hdfsNameNode"); - log.info("hdfsNameNode {}", hdfsNameNode); + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(conf); + FileSystem fileSystem = FileSystem.get(conf); - GetOpenCitationsRefs ocr = new GetOpenCitationsRefs(); + GetOpenCitationsRefs ocr = new GetOpenCitationsRefs(); - for (String file : inputFile) { - ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem); - } + for (String file : inputFile) { + ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem); + } - } + } - private void doExtract(String inputFile, String workingPath, FileSystem fileSystem) - throws IOException { + private void doExtract(String inputFile, String workingPath, FileSystem fileSystem) + throws IOException { - final Path path = new Path(inputFile); + final Path path = new Path(inputFile); - FSDataInputStream oc_zip = fileSystem.open(path); + FSDataInputStream oc_zip = fileSystem.open(path); - int count = 1; - try (ZipInputStream zis = new ZipInputStream(oc_zip)) { - ZipEntry entry = null; - while ((entry = zis.getNextEntry()) != null) { + int count = 1; + try (ZipInputStream zis = new ZipInputStream(oc_zip)) { + ZipEntry entry = null; + while ((entry = zis.getNextEntry()) != null) { - if (!entry.isDirectory()) { - String fileName = entry.getName(); - fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count; - count++; - try ( - FSDataOutputStream out = fileSystem - .create(new Path(workingPath + "/COCI/" + fileName + ".gz")); - GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { + if (!entry.isDirectory()) { + String fileName = entry.getName(); + fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count; + count++; + try ( + FSDataOutputStream out = fileSystem + .create(new Path(workingPath + "/COCI/" + fileName + ".gz")); + GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { - IOUtils.copy(zis, gzipOs); + IOUtils.copy(zis, gzipOs); - } - } + } + } - } + } - } + } - } + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java index a8ab6b74bc..64a54f143e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.bypassactionset.opencitations; +package eu.dnetlib.dhp.bypassactionset.opencitations; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; @@ -10,7 +10,6 @@ import java.util.*; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; - import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FlatMapFunction; @@ -23,136 +22,129 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; - import eu.dnetlib.dhp.schema.common.ModelConstants; - import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; - - public class SparkUpdateOCRels implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkUpdateOCRels.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final Logger log = LoggerFactory.getLogger(SparkUpdateOCRels.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(final String[] args) throws IOException, ParseException { + public static void main(final String[] args) throws IOException, ParseException { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - SparkUpdateOCRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json")))); + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + SparkUpdateOCRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json")))); - parser.parseArgument(args); + parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}", inputPath.toString()); + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}", inputPath.toString()); - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}", outputPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); - final boolean shouldDuplicateRels = - Optional.ofNullable(parser.get("shouldDuplicateRels")) - .map(Boolean::valueOf) - .orElse(Boolean.FALSE); + final boolean shouldDuplicateRels = Optional + .ofNullable(parser.get("shouldDuplicateRels")) + .map(Boolean::valueOf) + .orElse(Boolean.FALSE); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> - addOCRelations(spark, inputPath, outputPath, shouldDuplicateRels) - ); + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> addOCRelations(spark, inputPath, outputPath, shouldDuplicateRels)); - } + } - private static void addOCRelations(SparkSession spark, String inputPath, String outputPath, - boolean shouldDuplicateRels) { - spark - .sqlContext() - .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) - .flatMap( - (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), - Encoders.bean(Relation.class)) - .filter((FilterFunction) value -> value != null) - .write() - .mode(SaveMode.Append) - .option("compression", "gzip") - .json(outputPath); + private static void addOCRelations(SparkSession spark, String inputPath, String outputPath, + boolean shouldDuplicateRels) { + spark + .sqlContext() + .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) + .flatMap( + (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), + Encoders.bean(Relation.class)) + .filter((FilterFunction) value -> value != null) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); - } + } - private static List createRelation(String value, boolean duplicate) { - String[] line = value.split(","); - if (!line[1].startsWith("10.")) { - return new ArrayList<>(); - } - List relationList = new ArrayList<>(); + private static List createRelation(String value, boolean duplicate) { + String[] line = value.split(","); + if (!line[1].startsWith("10.")) { + return new ArrayList<>(); + } + List relationList = new ArrayList<>(); - String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[1])); - final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[2])); + String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[1])); + final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[2])); - relationList - .addAll( - getRelations( - citing, - cited)); + relationList + .addAll( + getRelations( + citing, + cited)); - if (duplicate && line[1].endsWith(REF_DOI)) { - citing = ID_PREFIX + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue(DOI, line[1].substring(0, line[1].indexOf(REF_DOI)))); - relationList.addAll(getRelations(citing, cited)); - } + if (duplicate && line[1].endsWith(REF_DOI)) { + citing = ID_PREFIX + IdentifierFactory + .md5(CleaningFunctions.normalizePidValue(DOI, line[1].substring(0, line[1].indexOf(REF_DOI)))); + relationList.addAll(getRelations(citing, cited)); + } - return relationList; - } + return relationList; + } - private static Collection getRelations(String citing, String cited) { + private static Collection getRelations(String citing, String cited) { - return Arrays - .asList( - getRelation(citing, cited, ModelConstants.CITES), - getRelation(cited, citing, ModelConstants.IS_CITED_BY)); - } + return Arrays + .asList( + getRelation(citing, cited, ModelConstants.CITES), + getRelation(cited, citing, ModelConstants.IS_CITED_BY)); + } - public static Relation getRelation( - String source, - String target, - String relclass) { - Relation r = new Relation(); - r.setCollectedfrom(getCollectedFrom()); - r.setSource(source); - r.setTarget(target); - r.setRelClass(relclass); - r.setRelType(ModelConstants.RESULT_RESULT); - r.setSubRelType(ModelConstants.CITATION); - r - .setDataInfo( - getDataInfo(UPDATE_DATA_INFO_TYPE, OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, OC_TRUST, false)); - return r; - } - - public static List getCollectedFrom() { - KeyValue kv = new KeyValue(); - kv.setKey(ModelConstants.OPENOCITATIONS_ID); - kv.setValue(ModelConstants.OPENOCITATIONS_NAME); - - return Arrays.asList(kv); - } + public static Relation getRelation( + String source, + String target, + String relclass) { + Relation r = new Relation(); + r.setCollectedfrom(getCollectedFrom()); + r.setSource(source); + r.setTarget(target); + r.setRelClass(relclass); + r.setRelType(ModelConstants.RESULT_RESULT); + r.setSubRelType(ModelConstants.CITATION); + r + .setDataInfo( + getDataInfo( + UPDATE_DATA_INFO_TYPE, OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, OC_TRUST, false)); + return r; + } + public static List getCollectedFrom() { + KeyValue kv = new KeyValue(); + kv.setKey(ModelConstants.OPENOCITATIONS_ID); + kv.setValue(ModelConstants.OPENOCITATIONS_NAME); + return Arrays.asList(kv); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json new file mode 100644 index 0000000000..7663a454b9 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "ip", + "paramLongName": "inputPath", + "paramDescription": "the URL from where to get the programme file", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json new file mode 100644 index 0000000000..31771a40ad --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "ip", + "paramLongName": "inputPath", + "paramDescription": "the URL from where to get the programme file", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + }, + { + "paramName": "rtn", + "paramLongName": "resultTableName", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + }, + { + "paramName": "bsp", + "paramLongName": "bipScorePath", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json new file mode 100644 index 0000000000..6c78811e6a --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "ip", + "paramLongName": "inputPath", + "paramDescription": "the URL from where to get the programme file", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + }, + { + "paramName": "rtn", + "paramLongName": "resultTableName", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + }, + { + "paramName": "fp", + "paramLongName": "fosPath", + "paramDescription": "the path of the new ActionSet", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/BipTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/BipTest.java new file mode 100644 index 0000000000..41595eacf4 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/BipTest.java @@ -0,0 +1,250 @@ + +package eu.dnetlib.dhp.bypassactionset; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.stream.Collectors; + +import eu.dnetlib.dhp.schema.oaf.Author; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.neethi.Assertion; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.bypassactionset.bipfinder.PrepareBipFinder; +import eu.dnetlib.dhp.bypassactionset.bipfinder.SparkUpdateBip; +import eu.dnetlib.dhp.bypassactionset.model.BipScore; +import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest; +import eu.dnetlib.dhp.schema.oaf.Measure; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; + +public class BipTest { + + private static final Logger log = LoggerFactory.getLogger(FOSTest.class); + + private static Path workingDir; + private static SparkSession spark; + private static LocalFileSystem fs; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final String ID_PREFIX = "50|doi_________"; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(BipTest.class.getSimpleName()); + + fs = FileSystem.getLocal(new Configuration()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(FOSTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(CountryPropagationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void prepareBipTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/bip/bip.json") + .getPath(); + + PrepareBipFinder + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--inputPath", sourcePath, + "--outputPath", workingDir.toString() + "/remapDoi" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/remapDoi") + .map(item -> OBJECT_MAPPER.readValue(item, BipScore.class)); + + Assertions.assertEquals(86, tmp.count()); +// tmp.foreach(v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v))); + + String doi1 = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.0000/096020199389707")); + + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); + Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getScoreList().size()); + Assertions + .assertEquals( + "6.34596412687e-09", tmp + .filter(r -> r.getId().equals(doi1)) + .collect() + .get(0) + .getScoreList() + .stream() + .filter(sl -> sl.getId().equals("influence")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "0.641151896994", tmp + .filter(r -> r.getId().equals(doi1)) + .collect() + .get(0) + .getScoreList() + .stream() + .filter(sl -> sl.getId().equals("popularity_alt")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "2.33375102921e-09", tmp + .filter(r -> r.getId().equals(doi1)) + .collect() + .get(0) + .getScoreList() + .stream() + .filter(sl -> sl.getId().equals("popularity")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + } + + @Test + void updateResult() throws Exception { + final String bipScorePath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json") + .getPath(); + + final String inputPath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json") + .getPath(); + + SparkUpdateBip + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--bipScorePath", bipScorePath, + "--inputPath", inputPath, + "--outputPath", workingDir.toString() + "/publication", + "--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication/bip") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(6, tmp.count()); + Assertions.assertEquals(0, tmp.filter(r -> r.getMeasures() != null).count()); + tmp.foreach(r -> Assertions.assertEquals("publication", r.getResulttype().getClassid())); + + } + + @Test + void updateResultMatchCheckMeasures() throws Exception { + final String bipScorePath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json") + .getPath(); + + final String inputPath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json") + .getPath(); + + SparkUpdateBip + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--bipScorePath", bipScorePath, + "--inputPath", inputPath, + "--outputPath", workingDir.toString() + "/publication", + "--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication/bip") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(6, tmp.count()); + Assertions.assertEquals(1, tmp.filter(r -> r.getMeasures() != null).count()); + Assertions + .assertEquals( + 1, tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")).count()); + Assertions + .assertEquals( + 1, + tmp + .filter( + r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f") + && r.getMeasures() != null) + .count()); + Assertions.assertEquals(3, tmp + .filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) + .collect() + .get(0) + .getMeasures().size()); + + Assertions.assertEquals("5.91019644836e-09", + tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) + .collect() + .get(0).getMeasures().stream().filter(m -> m.getId().equals("influence")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue()); + Assertions.assertEquals("0.0", + tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) + .collect() + .get(0).getMeasures().stream().filter(m -> m.getId().equals("popularity_alt")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue()); + Assertions.assertEquals("9.88840807598e-09", + tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) + .collect() + .get(0).getMeasures().stream().filter(m -> m.getId().equals("popularity")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue()); + + tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + + } + + + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/FOSTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/FOSTest.java new file mode 100644 index 0000000000..db539703b7 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/FOSTest.java @@ -0,0 +1,253 @@ + +package eu.dnetlib.dhp.bypassactionset; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.bypassactionset.fos.SparkUpdateFOS; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.neethi.Assertion; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.bypassactionset.fos.GetFOSData; +import eu.dnetlib.dhp.bypassactionset.fos.PrepareFOSSparkJob; +import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest; +import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; + +public class FOSTest { + private static final Logger log = LoggerFactory.getLogger(FOSTest.class); + + private static Path workingDir; + private static SparkSession spark; + private static LocalFileSystem fs; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final String ID_PREFIX = "50|doi_________"; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(CountryPropagationJobTest.class.getSimpleName()); + + fs = FileSystem.getLocal(new Configuration()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(FOSTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(CountryPropagationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void getFOSFileTest() throws CollectorException, IOException, ClassNotFoundException { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv") + .getPath(); + final String outputPath = workingDir.toString() + "/fos.json"; + + new GetFOSData() + .doRewrite(sourcePath, outputPath, "eu.dnetlib.dhp.bypassactionset.FOSDataModel", '\t', fs); + + BufferedReader in = new BufferedReader( + new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class); + + System.out.println(new ObjectMapper().writeValueAsString(fos)); + count += 1; + } + + assertEquals(38, count); + + } + + @Test + void distributeDoiTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos.json") + .getPath(); + + PrepareFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + + "-outputPath", workingDir.toString() + "/distribute" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/distribute") + .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); + + String doi1 = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.3390/s18072310")); + + assertEquals(50, tmp.count()); + assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi1)).count()); + assertEquals( + "engineering and technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel1()); + assertEquals("nano-technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel2()); + assertEquals( + "nanoscience & nanotechnology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel3()); + + String doi = ID_PREFIX + + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/1365-2656.12831")); + assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi)).count()); + assertEquals("social sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel1()); + assertEquals( + "psychology and cognitive sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel2()); + assertEquals("NULL", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel3()); + + + } + + @Test + void updateResult() throws Exception{ + + final String fosPath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json") + .getPath(); + + final String inputPath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json") + .getPath(); + + SparkUpdateFOS + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--fosPath", fosPath, + "--inputPath", inputPath, + "--outputPath", workingDir.toString() + "/publication", + "--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(6, tmp.count()); + + tmp.filter(r -> r.getSubject() != null).map(p -> p.getSubject()) + .foreach(s -> s.stream().forEach(sbj -> Assertions.assertFalse("FOS".equals(sbj.getQualifier().getClassid())))); + + + } + + @Test + void updateResultMatch() throws Exception{ + final String fosPath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json") + .getPath(); + + final String inputPath = getClass() + .getResource("/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json") + .getPath(); + + SparkUpdateFOS + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--fosPath", fosPath, + "--inputPath", inputPath, + "--outputPath", workingDir.toString() + "/publication", + "--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(6, tmp.count()); + + Assertions.assertEquals(3, tmp.filter(r -> r.getSubject() != null).map(p -> p.getSubject()).flatMap(v -> v.iterator()) + .filter(sbj -> sbj.getQualifier().getClassid().equals("FOS")).collect().size()); + + + List sbjs = tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) + .map(p -> p.getSubject()).collect().get(0); + + Assertions.assertEquals(12, sbjs.size()); + + Stream fosSubjs = sbjs.stream().filter(sbj -> sbj.getQualifier().getClassid().equals("FOS")); + + Assertions.assertTrue(fosSubjs + .map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("engineering and technology")); + Assertions.assertTrue(fosSubjs + .map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("nano-technology")); + Assertions.assertTrue(fosSubjs + .map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("nanoscience & nanotechnology")); + + fosSubjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()) ); + fosSubjs.forEach(sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid()) ); + fosSubjs.forEach(sbj -> Assertions.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname() )); + fosSubjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust() )); + fosSubjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference() )); + fosSubjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible() )); + fosSubjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred() )); + + + + } + + + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java deleted file mode 100644 index 5202897489..0000000000 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/GetFOSTest.java +++ /dev/null @@ -1,341 +0,0 @@ -package eu.dnetlib.dhp.bypassactionset; - -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.bypassactionset.fos.PrepareFOSSparkJob; -import eu.dnetlib.dhp.bypassactionset.fos.GetFOSData; -import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; - -import java.nio.file.Path; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.file.Files; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class GetFOSTest { - private static final Logger log = LoggerFactory.getLogger(GetFOSTest.class); - - private static Path workingDir; - private static SparkSession spark; - private static LocalFileSystem fs; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final String ID_PREFIX = "50|doi_________"; - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(CountryPropagationJobTest.class.getSimpleName()); - - fs = FileSystem.getLocal(new Configuration()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(GetFOSTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(CountryPropagationJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - - @Test - void getFOSFileTest() throws CollectorException, IOException, ClassNotFoundException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv") - .getPath(); - final String outputPath = workingDir.toString() + "/fos.json"; - - - - - new GetFOSData() - .doRewrite(sourcePath, outputPath, "eu.dnetlib.dhp.bypassactionset.FOSDataModel", '\t',fs ); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class); - - System.out.println(new ObjectMapper().writeValueAsString(fos)); - count += 1; - } - - assertEquals(38, count); - - - } - - - @Test - void distributeDoiTest() throws Exception{ - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos.json") - .getPath(); - - PrepareFOSSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - - "-outputPath", workingDir.toString() + "/distribute" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/distribute") - .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); - - String doi1 = ID_PREFIX + - IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.3390/s18072310")); - - assertEquals(50, tmp.count()); - assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi1)).count()); - assertEquals("engineering and technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel1()); - assertEquals("nano-technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel2()); - assertEquals("nanoscience & nanotechnology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel3()); - - String doi = ID_PREFIX + - IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/1365-2656.12831")); - assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi)).count()); - assertEquals("social sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel1()); - assertEquals("psychology and cognitive sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel2()); - assertEquals("NULL", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel3()); - -// {"doi":"10.1111/1365-2656.12831\u000210.17863/cam.24369","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} - - } - - /** - * @Test - * void testCountryPropagationSoftware() throws Exception { - * final String sourcePath = getClass() - * .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software") - * .getPath(); - * final String preparedInfoPath = getClass() - * .getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo") - * .getPath(); - * SparkCountryPropagationJob - * .main( - * new String[] { - * "--isSparkSessionManaged", Boolean.FALSE.toString(), - * "--sourcePath", sourcePath, - * "-saveGraph", "true", - * "-resultTableName", Software.class.getCanonicalName(), - * "-outputPath", workingDir.toString() + "/software", - * "-preparedInfoPath", preparedInfoPath - * }); - * - * final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - * - * JavaRDD tmp = sc - * .textFile(workingDir.toString() + "/software") - * .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); - * - * // tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s)); - * - * Assertions.assertEquals(10, tmp.count()); - * - * Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Software.class)); - * - * Assertions.assertEquals(6, verificationDs.filter("size(country) > 0").count()); - * Assertions.assertEquals(3, verificationDs.filter("size(country) = 1").count()); - * Assertions.assertEquals(3, verificationDs.filter("size(country) = 2").count()); - * Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count()); - * - * Dataset countryExploded = verificationDs - * .flatMap( - * (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) - * .map((MapFunction) Qualifier::getClassid, Encoders.STRING()); - * - * Assertions.assertEquals(9, countryExploded.count()); - * - * Assertions.assertEquals(1, countryExploded.filter("value = 'FR'").count()); - * Assertions.assertEquals(1, countryExploded.filter("value = 'TR'").count()); - * Assertions.assertEquals(2, countryExploded.filter("value = 'IT'").count()); - * Assertions.assertEquals(1, countryExploded.filter("value = 'US'").count()); - * Assertions.assertEquals(1, countryExploded.filter("value = 'MX'").count()); - * Assertions.assertEquals(1, countryExploded.filter("value = 'CH'").count()); - * Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count()); - * - * Dataset> countryExplodedWithCountryclassid = verificationDs - * .flatMap((FlatMapFunction>) row -> { - * List> prova = new ArrayList<>(); - * List countryList = row.getCountry(); - * countryList - * .forEach( - * c -> prova - * .add( - * new Tuple2<>( - * row.getId(), c.getClassid()))); - * return prova.iterator(); - * }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - * - * Assertions.assertEquals(9, countryExplodedWithCountryclassid.count()); - * - * countryExplodedWithCountryclassid.show(false); - * Assertions - * .assertEquals( - * 1, - * countryExplodedWithCountryclassid - * .filter( - * "_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'FR' ") - * .count()); - * Assertions - * .assertEquals( - * 1, - * countryExplodedWithCountryclassid - * .filter( - * "_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'TR' ") - * .count()); - * Assertions - * .assertEquals( - * 2, - * countryExplodedWithCountryclassid - * .filter( - * "_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'IT' or _2 = 'MX') ") - * .count()); - * Assertions - * .assertEquals( - * 2, - * countryExplodedWithCountryclassid - * .filter( - * "_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'IT' or _2 = 'US') ") - * .count()); - * Assertions - * .assertEquals( - * 1, - * countryExplodedWithCountryclassid - * .filter( - * "_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'JP'") - * .count()); - * Assertions - * .assertEquals( - * 2, - * countryExplodedWithCountryclassid - * .filter( - * "_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'CH' or _2 = 'JP') ") - * .count()); - * - * Dataset> countryExplodedWithCountryclassname = verificationDs - * .flatMap( - * (FlatMapFunction>) row -> { - * List> prova = new ArrayList<>(); - * List countryList = row.getCountry(); - * countryList - * .forEach( - * c -> prova - * .add( - * new Tuple2<>( - * row.getId(), - * c.getClassname()))); - * return prova.iterator(); - * }, - * Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - * - * countryExplodedWithCountryclassname.show(false); - * Assertions - * .assertEquals( - * 1, - * countryExplodedWithCountryclassname - * .filter( - * "_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'France' ") - * .count()); - * Assertions - * .assertEquals( - * 1, - * countryExplodedWithCountryclassname - * .filter( - * "_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'Turkey' ") - * .count()); - * Assertions - * .assertEquals( - * 2, - * countryExplodedWithCountryclassname - * .filter( - * "_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'Italy' or _2 = 'Mexico') ") - * .count()); - * Assertions - * .assertEquals( - * 2, - * countryExplodedWithCountryclassname - * .filter( - * "_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'Italy' or _2 = 'United States') ") - * .count()); - * Assertions - * .assertEquals( - * 1, - * countryExplodedWithCountryclassname - * .filter( - * "_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'Japan' ") - * .count()); - * Assertions - * .assertEquals( - * 2, - * countryExplodedWithCountryclassname - * .filter( - * "_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'Switzerland' or _2 = 'Japan') ") - * .count()); - * - * Dataset> countryExplodedWithCountryProvenance = verificationDs - * .flatMap( - * (FlatMapFunction>) row -> { - * List> prova = new ArrayList<>(); - * List countryList = row.getCountry(); - * countryList - * .forEach( - * c -> prova - * .add( - * new Tuple2<>( - * row.getId(), - * c - * .getDataInfo() - * .getInferenceprovenance()))); - * return prova.iterator(); - * }, - * Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - * - * Assertions - * .assertEquals( - * 7, countryExplodedWithCountryProvenance.filter("_2 = 'propagation'").count()); - * } - */ -} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/bip.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/bip.json new file mode 100644 index 0000000000..82233c0dfd --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/bip.json @@ -0,0 +1,86 @@ +{"10.0000/000000": [{"id": "influence", "unit": [{"value": "7.5597134689e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "4.903880192", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17977512835e-08", "key": "score"}]}]} +{"10.0000/096020199389707": [{"id": "influence", "unit": [{"value": "6.34596412687e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.641151896994", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.33375102921e-09", "key": "score"}]}]} +{"10.00000/jpmc.2017.106": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.39172290649e-09", "key": "score"}]}]} +{"10.0000/9781845416881": [{"id": "influence", "unit": [{"value": "5.96492048955e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "1.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.12641925838e-08", "key": "score"}]}]} +{"10.0000/anziamj.v0i0.266": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.76260934675e-10", "key": "score"}]}]} +{"10.0000/anziamj.v48i0.79": [{"id": "influence", "unit": [{"value": "6.93311506443e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.002176782336", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.7668105708e-09", "key": "score"}]}]} +{"10.0000/anziamj.v50i0.1472": [{"id": "influence", "unit": [{"value": "6.26777280882e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.406656", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.39745193285e-09", "key": "score"}]}]} +{"10.0000/cja5553": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/czastest.16": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/czastest.17": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.47956715615e-09", "key": "score"}]}]} +{"10.0000/czastest.18": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.47956715615e-09", "key": "score"}]}]} +{"10.0000/czastest.20": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/czastest.21": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.47956715615e-09", "key": "score"}]}]} +{"10.0000/czastest.28": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "3.47956715615e-09", "key": "score"}]}]} +{"10.0000/czastest.60": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/czt.2019.1.2.15": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v4i02.36": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v4i02.37": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v4i02.38": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v5i01.32": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v6i01.24": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v6i01.27": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v6i02.41": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v6i02.44": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v7i01.40": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.01810569717e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v7i01.42": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v7i01.47": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v7i01.51": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v7i01.52": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v7i02.86": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v7i02.88": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v7i02.91": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v8i01.129": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v8i01.180": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.39172290649e-09", "key": "score"}]}]} +{"10.0000/geoekonomi.v8i01.87": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "4.65008652949e-09", "key": "score"}]}]} +{"10.0000/hbv2004w010": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hbv2101w001": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hbv2101w002": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hbv2101w003": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hbv2101w004": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hbv2101w005": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hbv2101w006": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hbv2101w007": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hbv2102w001": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hbv2102w010": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "9.88840807598e-09", "key": "score"}]}]} +{"10.0000/hoplos.v1i1.13207": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v1i1.13208": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.39172290649e-09", "key": "score"}]}]} +{"10.0000/hoplos.v1i1.13209": [{"id": "influence", "unit": [{"value": "6.32078461509e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "1.6", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.3168486939e-09", "key": "score"}]}]} +{"10.0000/hoplos.v1i1.13210": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v1i1.13211": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.39172290649e-09", "key": "score"}]}]} +{"10.0000/hoplos.v1i1.13212": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.39172290649e-09", "key": "score"}]}]} +{"10.0000/hoplos.v1i2.13231": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i2.28782": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i2.28783": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i2.28784": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i2.28786": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i2.28787": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i2.28788": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i3.28234": [{"id": "influence", "unit": [{"value": "6.40470414877e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.6", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.89465099068e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i3.28236": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i3.28238": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i3.28239": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i3.28242": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v2i3.28243": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "6.26204125721e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i4.38186": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i4.38187": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i4.38190": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i4.38207": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i4.38209": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i5.41163": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i5.41166": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i5.41167": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i5.41168": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "7.28336930301e-09", "key": "score"}]}]} +{"10.0000/hoplos.v3i5.41229": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i6.36360": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i6.40796": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i6.41153": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i6.42511": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i6.42555": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i6.42752": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i6.42768": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i6.42795": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i7.41295": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i7.42830": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i7.42861": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} +{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json new file mode 100644 index 0000000000..bf5817bd20 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json @@ -0,0 +1,86 @@ +{"id":"50|doi_________63848be3afd635374828253a6f974f11","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} +{"id":"50|doi_________5da9060b89165e3f61a0806dcf2c2696","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________9ea0266b8ddd471eddb20635b89273bb","scoreList":[{"id":"influence","unit":[{"key":"score","value":"7.5597134689e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"4.903880192"}]},{"id":"popularity","unit":[{"key":"score","value":"1.17977512835e-08"}]}]} +{"id":"50|doi_________ab797495de07d4f4a25f08b84fca6021","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.34596412687e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.641151896994"}]},{"id":"popularity","unit":[{"key":"score","value":"2.33375102921e-09"}]}]} +{"id":"50|doi_________df5ade937e177dcfb82bc5ec3139fefe","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} +{"id":"50|doi_________357976ea7d4e744fe21966607334952c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} +{"id":"50|doi_________872982548f741b89eb5d30f509316dde","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.32078461509e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"1.6"}]},{"id":"popularity","unit":[{"key":"score","value":"8.3168486939e-09"}]}]} +{"id":"50|doi_________19c6da2771befb83f9f2715fc84f9edf","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.96492048955e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"1.0"}]},{"id":"popularity","unit":[{"key":"score","value":"1.12641925838e-08"}]}]} +{"id":"50|doi_________46e18e9e477d6fc71e002eae47cfc390","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________5cc344a6da53a8f2bac11faf7607e0b0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.76260934675e-10"}]}]} +{"id":"50|doi_________fdfadf5cefdb8b63f90d5a3475a95959","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} +{"id":"50|doi_________51fc7a9c7b9f1cf6705c7afb1de58967","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.93311506443e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.002176782336"}]},{"id":"popularity","unit":[{"key":"score","value":"1.7668105708e-09"}]}]} +{"id":"50|doi_________bbf7a94696ab67e4c29429522c31c0ef","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} +{"id":"50|doi_________232498b3aed64dd0f0db87b09b61d5cd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.26777280882e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.406656"}]},{"id":"popularity","unit":[{"key":"score","value":"3.39745193285e-09"}]}]} +{"id":"50|doi_________03749bed57efa24ab607527cf1eb94c5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________65074998446928c1c18e25313dcf974b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________2b62feaed6ad14760096c2a59f82836e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________d2d133a6fdfd44f34f96e225b5573447","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________3dd3cccbfcad3f206d1239a580af011f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________84852a20e9bbf1daf89803b5fcfc9c34","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]} +{"id":"50|doi_________84c5824b3a10a28a8bc26fed6223a08a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________8658fda7574016daa0d71c88eb6bab5f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]} +{"id":"50|doi_________ab52c136b88151f0c28f6ca4a5ef2a71","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________e8a2912fe3d70b13124e436294411378","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________cf3470c86f338ccf232f2869d0fa6ea2","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________16437c0064576207ba1438bf07fb9e21","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________f0ae1dc0f1dbb7fceaccc02d6e667f24","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.40470414877e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.6"}]},{"id":"popularity","unit":[{"key":"score","value":"7.89465099068e-09"}]}]} +{"id":"50|doi_________93d03d99bbc0f542fa2679e74882a096","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]} +{"id":"50|doi_________fcfce4bc985f22baf2dc1dc733a862e7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________3b7fd390517f45b0d37e0ec0adbb98b1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]} +{"id":"50|doi_________31306ba727c26ba04c0d4bbd0899d433","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________3e6151aa77865a1fdae56392f68b81f1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________1cd6e2c4cf189819fa8a011629b5744c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________45c855d81df747ff3c2033f6c8cf7bca","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________24ac4c0c4b143e37c2551723e9a55f36","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________5f9e3f1a076c2f587165f8ccd68286e5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________b637731c913efe3c1f283183ef4a59cc","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} +{"id":"50|doi_________490c0c23e4ed269f807e72e71cad09ae","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________15584e5a3a5bbdc487c85ab18d8a7c22","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________a7fd268447553e7e0fe06c19db28ea85","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________cf1b8db4480aa0e281a4cc122d3b2416","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________8880fd8fa9cafbde2431583dfbab1e1f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________22f8b4f61c49de05dcc0d94bf5e147d8","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________96e2b076fab4931ec2b1b9f43c9c31f9","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________1ac665f912877f93f9084dc38bc8174b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________394fd70c30b21b1336b5a0405496fbe0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________73caf44494091b089f8be65427fb4341","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________9b996963325dfae10dba5c3043938fc1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________1f03d5cab86f4963fbd3e0f1284cb9b8","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________87a59f3918f9ac2c394b2509173fb3e1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________9a8465f5343a1c845b6dd91496395c7e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________b03c0a657c669d24f45844307b9ad6bd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________04aeabd709d8a486bde733ffc5ef53ae","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} +{"id":"50|doi_________6c21d2d2133477141ad74226f8fd75b0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} +{"id":"50|doi_________b746ffb04cd9f815d5880c827c55040e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________0457b2e62a69f0e104db7cc575f2241a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________2fa691f5394020f7cb2df4f272a062e7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________aa2a77dc605567f5f2305b5f38a1a4b7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________cdb6f10588d85495208cbc6bf4a75000","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________6630766c2ad4783ebd01a9d7e9607eb1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________389315d5f74765688d34b8ee86c2514f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________beef7316dd6665798120698451446d74","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________525a46f117c946b8ae1caea542e24d0c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________fe3d2250ebaa65dae09aa372945c917f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________72d2ec1744cda7d49cf35707ff3e11c1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________d7e0c5fd1f00d8a90879f168d73b9e0b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________6e0e19ee1f9c1dbadf202ccd9919e6a7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________6b306286f9b4d207f81bc9164713243c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________acf5c0c1a125cd78fd9193589edc9152","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________66f645f86cdd9592953a1106e0dbf191","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________77ff842449308dd907ec46ebf9ccd539","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________a3d8fc73d1ac1308c117a61b4eb14ab5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________9ff57cb17abfcd338c2ca768c85e452a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} +{"id":"50|doi_________f903c5c3305fc7b1598a0ed989bb4f83","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} +{"id":"50|doi_________2260d2cdf6fbcee10d61d5e0f1428ebd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________549c5c8f66c36cab609b7221eae37040","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________388a95b886946d771a7f38e86d58f65e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________4141ef3f730fc811930a66c088486c34","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} +{"id":"50|doi_________e3b32800bba3187fbf7f470df08755ea","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} +{"id":"50|doi_________d8d02aa2c2347ecc628724265c5cae27","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} +{"id":"50|doi_________08f1db95b06e62e34948f2b47ac449af","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} +{"id":"50|doi_________cbbab3f7306b0573664b94615f5c04e2","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} +{"id":"50|doi_________b24ab3e127aa67e2a1017292988d571f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} +{"id":"50|doi_________eb9a4f92bd6934727e2722d22340884a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} +{"id":"50|doi_________727484ef97b31ecb0806cc959869aa97","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} +{"id":"50|doi_________224c865ef04d10c117913fa08eeb1be4","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json new file mode 100644 index 0000000000..b458cbeb3e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json @@ -0,0 +1,6 @@ +{"author":[{"fullname":"Tanouayi, Gnon","name":"Gnon","pid":[],"rank":1,"surname":"Tanouayi"},{"fullname":"GNANDI, Kissao","name":"Kissao","pid":[],"rank":2,"surname":"Gnandi"},{"fullname":"Ouro-Sama, Kamilou","name":"Kamilou","pid":[],"rank":3,"surname":"Ouro-Sama"},{"fullname":"Ahoudi, Housséni","name":"Housséni","pid":[],"rank":4,"surname":"Ahoudi"},{"fullname":"Solitoke, Hodabalo Dhéoulaba","name":"Hodabalo Dhéoulaba","pid":[],"rank":5,"surname":"Solitoke"},{"fullname":"Badassan, Tchaa Esso-Essinam","name":"Tchaa Esso-Essinam","pid":[],"rank":6,"surname":"Badassan"},{"fullname":"Nyametso, A. Yawovi","name":"A. Yawovi","pid":[],"rank":7,"surname":"Nyametso"},{"fullname":"Agbéko, Aduayi-Akué Adoté","name":"Aduayi-Akué Adoté","pid":[],"rank":8,"surname":"Agbéko"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Importer of dst articles previously hosted by inist Eid system account"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"dateofcollection":"2021-10-05T19:43:46.821Z","dateoftransformation":"2021-10-05T20:15:55.705Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This study is a contribution to the development of adsorption techniques for the removal of fluoride in natural waters. The work is carried out on a laboratory scale using local geo-materials sorbents, on the one hand the residues from the treatment of natural phosphorite of Hahotoé-Kpogamé and on the other hand the attapulgite clay mineral from the costal basin of Togo. The work carried out concerns the adsorption of fluoride on those sorbents. The following parameters are batch tested on synthetic fluoride solutions: time, solution pH, geo-material dose and fluoride concentration. The fluoride is analyzed by absorption spectrometry. The adsorption yields on the phosphorite treatment residues for aqueous fluoride solutions at an initial concentration of 10 mg/L and an adsorbent concentration of 10 g/L are 49 % at pH 6.5 and 66 % at pH 4.0. In the same experimental conditions, the yields on clay minerals are 28.2 % and 36.3 %. These yields are logically improved by increasing the adsorbent dosage (from 2 to 30 g/L). Additional tests are carried out on natural water at an initial fluoride concentration of 3.76 mg/L."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Cette étude est une contribution au développement de techniques d’adsorption pour l’élimination du fluor dans les eaux naturelles. Les travaux ont été réalisés à l’échelle du laboratoire en utilisant comme sorbants des géo-matériaux locaux, d’une part les résidus du traitement des phosphates naturels de Hahotoé-Kpogamé et d’autre part l’argilite feuilletée du bassin sédimentaire côtier du Togo. Les travaux réalisés concernent l’adsorption du fluor sur les sorbants considérés. Les paramètres suivants ont été testés en batch sur des solutions synthétiques de fluor : le temps, le pH de la solution, la dose du géo-matériau et la concentration du fluor. Le fluor a été dosé par spectromètrie d’absorption. Les rendements d’adsorption sur les résidus de traitement des phosphates pour des solutions aqueuses de fluor à concentration initiale de 10 mg/L et une concentration en adsorbant de 10 g/L ont été de 49 % à pH 6,5 et 66 % à pH de 4,0. Dans les mêmes conditions expérimentales, les rendements sur les argilites ont été de 28,2 % et 36,3 %. Ces rendements ont logiquement été améliorés en augmentant le dosage en adsorbant (de 2 à 30 g/L). Des essais complémentaires ont été réalisés sur une eau naturelle à une concentration initiale en fluor de 3,76 mg/L."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doi_________b24ab3e127aa67e2a1017292988d571f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.4267/dechets-sciences-techniques.3534"}],"collectedfrom":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://eid.episciences.org/7781"]}],"language":{"classid":"fra/fre","classname":"French","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146800948,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Foai.episciences.org%2F","datestamp":"2017-05-01","harvestDate":"2021-10-05T19:43:46.821Z","identifier":"oai:episciences.org:eid:7781","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:episciences.org:eid:7781","50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"ISSN: 2778-844X"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Environnement, Ingénierie & Développement"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Episciences.org"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"eid:7781 - Environnement, Ingénierie & Développement, 2017-05-01, N°73 - mai 2017"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"clay"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"geo-materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphorite of Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"removal of fluoride"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"argilite"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"élimination du fluor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"géo-matériaux"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphate de Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"[SDE.IE]Environmental Sciences/Environmental Engineering"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Défluoruration des eaux à l’aide des résidus du traitement des phosphates naturels et des argilites feuilletées"}]} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"dateofcollection":"2020-06-01T07:11:47.22Z","dateoftransformation":"2020-07-25T07:25:11.051Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The results of treatment of 21 patients with multiple injuries, including 12 (57.2%) patients with lesions of limb bones, 9 (42.8%) patients with injuries of the pelvis treated at the Department of Traumatology number 2, 5 for the period of 2013 to 2014 were analyzed by the authors. Developed gentle immobilization of the lower limbs in patients with multiple injuries provides adequate fixation and extension of the lower limb in the intensive care period and during emergency external fixation by the device of external fixation device shin bone or hip.The introduction of surgical treatment of patients in the acute period of polytrauma, using minimally invasive fracture fixation technology allowed providing the early stabilization of the victim’s condition, to avoid diagnostic errors and obtain positive results of treatment in 98% of cases."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"distributionlocation":"","hostedby":{"key":"10|07b5c0ccd4fe::fae55652d82f1c4f6a6e3d2637b9ebe1","value":"World Science"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://rsglobal.pl/index.php/ws/article/view/895"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2413-1032","issnPrinted":"2414-6404","name":"World Science","sp":"","vol":""},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635147062560,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frsglobal.pl%2Findex.php%2Findex%2Foai","datestamp":"2020-05-23T18:09:46Z","harvestDate":"2020-06-01T07:11:47.22Z","identifier":"oai:ojs2.rsglobal.pl:article/895","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:ojs2.rsglobal.pl:article/895","50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"RS Global Sp. z O.O."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Vol 3 No 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Том 3 № 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2414-6404"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2413-1032"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"}]} +{"author":[{"fullname":"Kemppainen, Mika","name":"Mika","pid":[],"rank":1,"surname":"Kemppainen"},{"fullname":"Virkkunen, Iikka","name":"Iikka","pid":[],"rank":2,"surname":"Virkkunen"},{"fullname":"Pitkänen, Jorma","name":"Jorma","pid":[],"rank":3,"surname":"Pitkänen"},{"fullname":"Paussu, Raimo","name":"Raimo","pid":[],"rank":4,"surname":"Paussu"},{"fullname":"Hänninen, Hannu","name":"Hannu","pid":[],"rank":5,"surname":"Hänninen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2021-10-04T12:38:49.007Z","dateoftransformation":"2021-10-04T16:48:12.265Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::210c52944502777ba567442480e6a76e","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146785228,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-23T08:45:14Z","harvestDate":"2021-10-04T12:38:49.007Z","identifier":"oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::210c52944502777ba567442480e6a76e","oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kemppainen , M , Virkkunen , I , Pitkänen , J , Paussu , R & Hänninen , H 2003 , ' Comparison of realistic artificial cracks and in-service cracks ' , The e-Journal of Nondestructive Testing & Ultrasonics , vol. 8 , no. 3 , 6 . < http://www.ndt.net/article/ecndt02/401/401.htm >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Comparison of realistic artificial cracks and in-service cracks"}]} +{"author":[{"fullname":"Kelhä, Väinö","name":"Väinö","pid":[],"rank":1,"surname":"Kelhä"},{"fullname":"Manninen, M.","name":"M.","pid":[],"rank":2,"surname":"Manninen"},{"fullname":"Oittinen, P.","name":"P.","pid":[],"rank":3,"surname":"Oittinen"},{"fullname":"Tiesmäki, Jarkko","name":"Jarkko","pid":[],"rank":4,"surname":"Tiesmäki"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"dateofcollection":"2021-10-04T12:35:21.081Z","dateoftransformation":"2021-10-04T18:07:22.62Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3754cff043a1700077031ea29f8cc240","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146808523,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-08-31T11:55:56Z","harvestDate":"2021-10-04T12:35:21.081Z","identifier":"oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::3754cff043a1700077031ea29f8cc240","oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kelhä , V , Manninen , M , Oittinen , P & Tiesmäki , J 1974 , ' A parallel plate tackmeter for measuring the splitting resistance of printing inks ' , Surface Coatings International: JOCCA , vol. 57 , pp. 184-188 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A parallel plate tackmeter for measuring the splitting resistance of printing inks"}]} +{"author":[{"fullname":"Mononen, Petri","name":"Petri","pid":[],"rank":1,"surname":"Mononen"},{"fullname":"Innamaa, Satu","name":"Satu","pid":[],"rank":2,"surname":"Innamaa"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2021-10-04T12:44:18.145Z","dateoftransformation":"2021-10-04T18:11:36.436Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3875365f5052758953b072682e62bc80","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8602dae4-00e8-4f45-828b-65a367eb4730"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146809712,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-09-17T12:01:51Z","harvestDate":"2021-10-04T12:44:18.145Z","identifier":"oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","50|355e65625b88::3875365f5052758953b072682e62bc80"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mononen , P & Innamaa , S 2013 , ' Enhancing journey quality : Field Operational Test of Aftermarket and Nomadic Devices in Vehicles ' , Baltic Transport Journal , pp. 46-47 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Enhancing journey quality:Field Operational Test of Aftermarket and Nomadic Devices in Vehicles"}]} +{"author":[{"fullname":"Tsupari, Eemeli","name":"Eemeli","pid":[],"rank":1,"surname":"Tsupari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"dateofcollection":"2021-10-04T12:44:48.87Z","dateoftransformation":"2021-10-04T19:47:43.327Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Despite international agreements, global greenhouse gas (GHG) emissions have not decreased according to the targets. Consequently, our generation is creating an enormous problem for future generations. As climate change is a global problem, GHG emissions must decrease globally. Consequently, international policies are needed, actions should be effective and the impacts should be assessed with broad boundaries. In Europe, the cornerstone of climate policy is the EU Emissions Trading Scheme (EU ETS) but the rebound impacts within the EU ETS are often excluded in the assessments. This dissertation examines the impacts of major CO2 emission reduction solutions with different system boundaries, highlighting the importance of boundary selection on the results. In addition, the economic feasibilities of the selected solutions are evaluated.The case examples represent the most important sectors in terms of global CO2 emissions, such as electricity and heat production, the steel industry and transport. The studied technologies include efficient Waste-to-Energy (WtE) concepts with high power-to-heat ratio, utilisation of CO2 Capture and Storage (CCS) in different applications, replacing steel mill blast furnaces with Oxygen Blast Furnaces (OBF), Combined Heat and Power (CHP) and Carbon Capture and Utilisation (CCU) for storable fuels, which can be used for example in transportation. The results highlight the importance of the consequences in the electricity production system as well as the rebound impacts in the EU ETS. For example, the studied concepts to decrease direct GHG emissions of steel mills lead to increased power purchase from markets and consequently increase in emissions of the power system. The impacts of CCU concepts based on electrolysis increase the emissions in electricity production but enable a decrease in the usage of fossil fuels in transportation. In addition, converting electricity to storable fuels enable higher shares of variable solar and wind energy in the power systems. The consequences in the power systems are complex, including for example the impacts on electricity imports and exports, future investments and the EU ETS. Even if these impacts can be recognised by qualitative means, unambiguous quantitative consequences cannot be given. Understanding the decisive impacts of the framework and boundaries is crucial to interpreting different assessments and making effective actions and policy decisions. Solutions which decrease emissions within a narrow system boundary can actually increase the emissions of the broader system."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0006","classname":"Doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d62ac5ef-7347-400f-95b2-59d970ceb505"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146840046,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-05-18T10:33:00Z","harvestDate":"2021-10-04T12:44:48.87Z","identifier":"oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto University"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tsupari , E 2018 , ' Impact of system boundaries on the effectiveness of climate change mitigation actions : Dissertation ' , Doctor Degree , Aalto University . < http://urn.fi/URN:ISBN:978-952-60-8358-2 >"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"energy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"environmental science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"climate change mitigation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"greenhouse gases"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon dioxide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"emissions trading"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"economic feasibility"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/dk/atira/pure/sustainabledevelopmentgoals/climate_action"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"SDG 13 - Climate Action"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Impact of system boundaries on the effectiveness of climate change mitigation actions:Dissertation"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json new file mode 100644 index 0000000000..e9d045acab --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json @@ -0,0 +1,6 @@ +{"author":[{"fullname":"Tanouayi, Gnon","name":"Gnon","pid":[],"rank":1,"surname":"Tanouayi"},{"fullname":"GNANDI, Kissao","name":"Kissao","pid":[],"rank":2,"surname":"Gnandi"},{"fullname":"Ouro-Sama, Kamilou","name":"Kamilou","pid":[],"rank":3,"surname":"Ouro-Sama"},{"fullname":"Ahoudi, Housséni","name":"Housséni","pid":[],"rank":4,"surname":"Ahoudi"},{"fullname":"Solitoke, Hodabalo Dhéoulaba","name":"Hodabalo Dhéoulaba","pid":[],"rank":5,"surname":"Solitoke"},{"fullname":"Badassan, Tchaa Esso-Essinam","name":"Tchaa Esso-Essinam","pid":[],"rank":6,"surname":"Badassan"},{"fullname":"Nyametso, A. Yawovi","name":"A. Yawovi","pid":[],"rank":7,"surname":"Nyametso"},{"fullname":"Agbéko, Aduayi-Akué Adoté","name":"Aduayi-Akué Adoté","pid":[],"rank":8,"surname":"Agbéko"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Importer of dst articles previously hosted by inist Eid system account"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"dateofcollection":"2021-10-05T19:43:46.821Z","dateoftransformation":"2021-10-05T20:15:55.705Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This study is a contribution to the development of adsorption techniques for the removal of fluoride in natural waters. The work is carried out on a laboratory scale using local geo-materials sorbents, on the one hand the residues from the treatment of natural phosphorite of Hahotoé-Kpogamé and on the other hand the attapulgite clay mineral from the costal basin of Togo. The work carried out concerns the adsorption of fluoride on those sorbents. The following parameters are batch tested on synthetic fluoride solutions: time, solution pH, geo-material dose and fluoride concentration. The fluoride is analyzed by absorption spectrometry. The adsorption yields on the phosphorite treatment residues for aqueous fluoride solutions at an initial concentration of 10 mg/L and an adsorbent concentration of 10 g/L are 49 % at pH 6.5 and 66 % at pH 4.0. In the same experimental conditions, the yields on clay minerals are 28.2 % and 36.3 %. These yields are logically improved by increasing the adsorbent dosage (from 2 to 30 g/L). Additional tests are carried out on natural water at an initial fluoride concentration of 3.76 mg/L."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Cette étude est une contribution au développement de techniques d’adsorption pour l’élimination du fluor dans les eaux naturelles. Les travaux ont été réalisés à l’échelle du laboratoire en utilisant comme sorbants des géo-matériaux locaux, d’une part les résidus du traitement des phosphates naturels de Hahotoé-Kpogamé et d’autre part l’argilite feuilletée du bassin sédimentaire côtier du Togo. Les travaux réalisés concernent l’adsorption du fluor sur les sorbants considérés. Les paramètres suivants ont été testés en batch sur des solutions synthétiques de fluor : le temps, le pH de la solution, la dose du géo-matériau et la concentration du fluor. Le fluor a été dosé par spectromètrie d’absorption. Les rendements d’adsorption sur les résidus de traitement des phosphates pour des solutions aqueuses de fluor à concentration initiale de 10 mg/L et une concentration en adsorbant de 10 g/L ont été de 49 % à pH 6,5 et 66 % à pH de 4,0. Dans les mêmes conditions expérimentales, les rendements sur les argilites ont été de 28,2 % et 36,3 %. Ces rendements ont logiquement été améliorés en augmentant le dosage en adsorbant (de 2 à 30 g/L). Des essais complémentaires ont été réalisés sur une eau naturelle à une concentration initiale en fluor de 3,76 mg/L."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.4267/dechets-sciences-techniques.3534"}],"collectedfrom":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://eid.episciences.org/7781"]}],"language":{"classid":"fra/fre","classname":"French","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146800948,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Foai.episciences.org%2F","datestamp":"2017-05-01","harvestDate":"2021-10-05T19:43:46.821Z","identifier":"oai:episciences.org:eid:7781","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:episciences.org:eid:7781","50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"ISSN: 2778-844X"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Environnement, Ingénierie & Développement"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Episciences.org"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"eid:7781 - Environnement, Ingénierie & Développement, 2017-05-01, N°73 - mai 2017"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"clay"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"geo-materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphorite of Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"removal of fluoride"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"argilite"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"élimination du fluor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"géo-matériaux"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphate de Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"[SDE.IE]Environmental Sciences/Environmental Engineering"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Défluoruration des eaux à l’aide des résidus du traitement des phosphates naturels et des argilites feuilletées"}]} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"dateofcollection":"2020-06-01T07:11:47.22Z","dateoftransformation":"2020-07-25T07:25:11.051Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The results of treatment of 21 patients with multiple injuries, including 12 (57.2%) patients with lesions of limb bones, 9 (42.8%) patients with injuries of the pelvis treated at the Department of Traumatology number 2, 5 for the period of 2013 to 2014 were analyzed by the authors. Developed gentle immobilization of the lower limbs in patients with multiple injuries provides adequate fixation and extension of the lower limb in the intensive care period and during emergency external fixation by the device of external fixation device shin bone or hip.The introduction of surgical treatment of patients in the acute period of polytrauma, using minimally invasive fracture fixation technology allowed providing the early stabilization of the victim’s condition, to avoid diagnostic errors and obtain positive results of treatment in 98% of cases."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"distributionlocation":"","hostedby":{"key":"10|07b5c0ccd4fe::fae55652d82f1c4f6a6e3d2637b9ebe1","value":"World Science"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://rsglobal.pl/index.php/ws/article/view/895"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2413-1032","issnPrinted":"2414-6404","name":"World Science","sp":"","vol":""},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635147062560,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frsglobal.pl%2Findex.php%2Findex%2Foai","datestamp":"2020-05-23T18:09:46Z","harvestDate":"2020-06-01T07:11:47.22Z","identifier":"oai:ojs2.rsglobal.pl:article/895","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:ojs2.rsglobal.pl:article/895","50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"RS Global Sp. z O.O."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Vol 3 No 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Том 3 № 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2414-6404"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2413-1032"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"}]} +{"author":[{"fullname":"Kemppainen, Mika","name":"Mika","pid":[],"rank":1,"surname":"Kemppainen"},{"fullname":"Virkkunen, Iikka","name":"Iikka","pid":[],"rank":2,"surname":"Virkkunen"},{"fullname":"Pitkänen, Jorma","name":"Jorma","pid":[],"rank":3,"surname":"Pitkänen"},{"fullname":"Paussu, Raimo","name":"Raimo","pid":[],"rank":4,"surname":"Paussu"},{"fullname":"Hänninen, Hannu","name":"Hannu","pid":[],"rank":5,"surname":"Hänninen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2021-10-04T12:38:49.007Z","dateoftransformation":"2021-10-04T16:48:12.265Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::210c52944502777ba567442480e6a76e","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146785228,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-23T08:45:14Z","harvestDate":"2021-10-04T12:38:49.007Z","identifier":"oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::210c52944502777ba567442480e6a76e","oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kemppainen , M , Virkkunen , I , Pitkänen , J , Paussu , R & Hänninen , H 2003 , ' Comparison of realistic artificial cracks and in-service cracks ' , The e-Journal of Nondestructive Testing & Ultrasonics , vol. 8 , no. 3 , 6 . < http://www.ndt.net/article/ecndt02/401/401.htm >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Comparison of realistic artificial cracks and in-service cracks"}]} +{"author":[{"fullname":"Kelhä, Väinö","name":"Väinö","pid":[],"rank":1,"surname":"Kelhä"},{"fullname":"Manninen, M.","name":"M.","pid":[],"rank":2,"surname":"Manninen"},{"fullname":"Oittinen, P.","name":"P.","pid":[],"rank":3,"surname":"Oittinen"},{"fullname":"Tiesmäki, Jarkko","name":"Jarkko","pid":[],"rank":4,"surname":"Tiesmäki"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"dateofcollection":"2021-10-04T12:35:21.081Z","dateoftransformation":"2021-10-04T18:07:22.62Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3754cff043a1700077031ea29f8cc240","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146808523,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-08-31T11:55:56Z","harvestDate":"2021-10-04T12:35:21.081Z","identifier":"oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::3754cff043a1700077031ea29f8cc240","oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kelhä , V , Manninen , M , Oittinen , P & Tiesmäki , J 1974 , ' A parallel plate tackmeter for measuring the splitting resistance of printing inks ' , Surface Coatings International: JOCCA , vol. 57 , pp. 184-188 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A parallel plate tackmeter for measuring the splitting resistance of printing inks"}]} +{"author":[{"fullname":"Mononen, Petri","name":"Petri","pid":[],"rank":1,"surname":"Mononen"},{"fullname":"Innamaa, Satu","name":"Satu","pid":[],"rank":2,"surname":"Innamaa"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2021-10-04T12:44:18.145Z","dateoftransformation":"2021-10-04T18:11:36.436Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3875365f5052758953b072682e62bc80","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8602dae4-00e8-4f45-828b-65a367eb4730"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146809712,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-09-17T12:01:51Z","harvestDate":"2021-10-04T12:44:18.145Z","identifier":"oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","50|355e65625b88::3875365f5052758953b072682e62bc80"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mononen , P & Innamaa , S 2013 , ' Enhancing journey quality : Field Operational Test of Aftermarket and Nomadic Devices in Vehicles ' , Baltic Transport Journal , pp. 46-47 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Enhancing journey quality:Field Operational Test of Aftermarket and Nomadic Devices in Vehicles"}]} +{"author":[{"fullname":"Tsupari, Eemeli","name":"Eemeli","pid":[],"rank":1,"surname":"Tsupari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"dateofcollection":"2021-10-04T12:44:48.87Z","dateoftransformation":"2021-10-04T19:47:43.327Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Despite international agreements, global greenhouse gas (GHG) emissions have not decreased according to the targets. Consequently, our generation is creating an enormous problem for future generations. As climate change is a global problem, GHG emissions must decrease globally. Consequently, international policies are needed, actions should be effective and the impacts should be assessed with broad boundaries. In Europe, the cornerstone of climate policy is the EU Emissions Trading Scheme (EU ETS) but the rebound impacts within the EU ETS are often excluded in the assessments. This dissertation examines the impacts of major CO2 emission reduction solutions with different system boundaries, highlighting the importance of boundary selection on the results. In addition, the economic feasibilities of the selected solutions are evaluated.The case examples represent the most important sectors in terms of global CO2 emissions, such as electricity and heat production, the steel industry and transport. The studied technologies include efficient Waste-to-Energy (WtE) concepts with high power-to-heat ratio, utilisation of CO2 Capture and Storage (CCS) in different applications, replacing steel mill blast furnaces with Oxygen Blast Furnaces (OBF), Combined Heat and Power (CHP) and Carbon Capture and Utilisation (CCU) for storable fuels, which can be used for example in transportation. The results highlight the importance of the consequences in the electricity production system as well as the rebound impacts in the EU ETS. For example, the studied concepts to decrease direct GHG emissions of steel mills lead to increased power purchase from markets and consequently increase in emissions of the power system. The impacts of CCU concepts based on electrolysis increase the emissions in electricity production but enable a decrease in the usage of fossil fuels in transportation. In addition, converting electricity to storable fuels enable higher shares of variable solar and wind energy in the power systems. The consequences in the power systems are complex, including for example the impacts on electricity imports and exports, future investments and the EU ETS. Even if these impacts can be recognised by qualitative means, unambiguous quantitative consequences cannot be given. Understanding the decisive impacts of the framework and boundaries is crucial to interpreting different assessments and making effective actions and policy decisions. Solutions which decrease emissions within a narrow system boundary can actually increase the emissions of the broader system."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0006","classname":"Doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d62ac5ef-7347-400f-95b2-59d970ceb505"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146840046,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-05-18T10:33:00Z","harvestDate":"2021-10-04T12:44:48.87Z","identifier":"oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto University"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tsupari , E 2018 , ' Impact of system boundaries on the effectiveness of climate change mitigation actions : Dissertation ' , Doctor Degree , Aalto University . < http://urn.fi/URN:ISBN:978-952-60-8358-2 >"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"energy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"environmental science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"climate change mitigation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"greenhouse gases"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon dioxide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"emissions trading"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"economic feasibility"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/dk/atira/pure/sustainabledevelopmentgoals/climate_action"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"SDG 13 - Climate Action"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Impact of system boundaries on the effectiveness of climate change mitigation actions:Dissertation"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json new file mode 100644 index 0000000000..40ad881e38 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json @@ -0,0 +1,50 @@ +{"doi":"50|doi_________b24ab3e127aa67e2a1017292988d571f","level1":"engineering and technology","level2":"nano-technology","level3":"nanoscience & nanotechnology"} +{"doi":"50|doi_________f648499be6ba8e83226834167f22a7cb","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} +{"doi":"50|doi_________439b0885db30c66ecf62a2a6a4451116","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} +{"doi":"50|doi_________c22f065c9ab34fee87a3952fb79f5ee6","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"50|doi_________9ea3d8140aaa8add5e929be92f0dab13","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"50|doi_________12f446645a131ab55fc5dabf6692da31","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} +{"doi":"50|doi_________550a40c9b57fcc974c127ad69dd60df2","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"environmental sciences"} +{"doi":"50|doi_________6b95eee8d0eff26b7cca8e960968ad62","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} +{"doi":"50|doi_________5ff24d570b3984ddcf04f58b771ad635","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} +{"doi":"50|doi_________b909cd0953065bc611bac1d89e96bf6c","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} +{"doi":"50|doi_________d56d9dc21f317b3e009d5b6c8ea87212","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} +{"doi":"50|doi_________3ea4d5309ce7934ed281342dd1f70867","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"50|doi_________f23029f6f070b4b2e1c852ce7f9f5f32","level1":"medical and health sciences","level2":"other medical science","level3":"health policy & services"} +{"doi":"50|doi_________2ea2433fb314647e72cab828dd529f00","level1":"natural sciences","level2":"biological sciences","level3":"plant biology & botany"} +{"doi":"50|doi_________ffe10c217b3a96f7584cf09fdad579e1","level1":"engineering and technology","level2":"NULL","level3":"NULL"} +{"doi":"50|doi_________45e682be5a57e2fabea2c02ba0752f1a","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"50|doi_________312f5db945545c66e6f18c62faf6290c","level1":"medical and health sciences","level2":"health sciences","level3":"NULL"} +{"doi":"50|doi_________d53bd3a4e48921415e554a4edc91d6db","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} +{"doi":"50|doi_________2819592482582ff33363069d116abd64","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} +{"doi":"50|doi_________e41a47550fed93904e443123b752bc2b","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} +{"doi":"50|doi_________43e8bf6e95cd3043f510771cf0b0c984","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"} +{"doi":"50|doi_________000c1dc14e99b89fc52976533338fe4c","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"} +{"doi":"50|doi_________5cf55e49aebd633f1326d28451d1f6e5","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"computer hardware & architecture"} +{"doi":"50|doi_________5b79bd7bd9f87361b4a4abc3cbb2df75","level1":"natural sciences","level2":"mathematics","level3":"numerical & computational mathematics"} +{"doi":"50|doi_________51a7b4738d332570beb98be13e95d369","level1":"natural sciences","level2":"chemical sciences","level3":"NULL"} +{"doi":"50|doi_________6a9271220296585204ff81d651215d63","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} +{"doi":"50|doi_________a2b8554df106bb29a0035e2ea56046a2","level1":"medical and health sciences","level2":"health sciences","level3":"biochemistry & molecular biology"} +{"doi":"50|doi_________fa2b92d5140f67a6c69c970a9e8d2cf0","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} +{"doi":"50|doi_________b5f6c78a31abbb806e1e375a73231dd1","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} +{"doi":"50|doi_________358777f48b491554cdee63c4dcabe6aa","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} +{"doi":"50|doi_________809f282159a9f46a41f92b6fc8d1095f","level1":"natural sciences","level2":"biological sciences","level3":"marine biology & hydrobiology"} +{"doi":"50|doi_________94f1b0b1509700d4370cbe7571000bfc","level1":"engineering and technology","level2":"industrial biotechnology","level3":"industrial engineering & automation"} +{"doi":"50|doi_________33390d9d5163da63f73acd173ef704e0","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} +{"doi":"50|doi_________3149f0a909641720480f62faf1f886b9","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} +{"doi":"50|doi_________1e29deb6473bdb2c84e5966fc4c557bb","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} +{"doi":"50|doi_________effab60be43d653f2cccf1cf6de461df","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} +{"doi":"50|doi_________14a6ce8c28da6e82412720f1330e826d","level1":"natural sciences","level2":"biological sciences","level3":"NULL"} +{"doi":"50|doi_________f59abf7f96244398a465b6e9a7375311","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"NULL"} +{"doi":"50|doi_________b1b7b5c7251fa1901583ec4840e2d3ec","level1":"medical and health sciences","level2":"basic medicine","level3":"biochemistry & molecular biology"} +{"doi":"50|doi_________e024d1b738df3b24bc58fa0228542571","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} +{"doi":"50|doi_________0e03d3592dca1baedfc74f1fbe0b9f22","level1":"natural sciences","level2":"NULL","level3":"NULL"} +{"doi":"50|doi_________0e1777e31f32984e5a261205be28da69","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} +{"doi":"50|doi_________7b464a5eb02a959aeedc7b051fe0f89f","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} +{"doi":"50|doi_________32fed3ec7c9ac157b73e050ff1c158d3","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} +{"doi":"50|doi_________8302f548fe724bc58e53e2bd329cb3c3","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} +{"doi":"50|doi_________44fbef7e57e4123ef44f10bb073f9ef8","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} +{"doi":"50|doi_________c8e7d24b1649024e85bcf9a4c520a65b","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} +{"doi":"50|doi_________877a3c4a72d2a6b4aa60a2da016237df","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} +{"doi":"50|doi_________7af7cda00a082fa8624493d74357d3f7","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} +{"doi":"50|doi_________20201cc71fd003dae0d58016dd4ef4af","level1":"agricultural and veterinary sciences","level2":"agriculture, forestry, and fisheries","level3":"agronomy & agriculture"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json new file mode 100644 index 0000000000..a01ce3f687 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json @@ -0,0 +1,6 @@ +{"collectedfrom":[{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":true,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146800948,"id":"50|doi_________b24ab3e127aa67e2a1017292988d571f","originalId":["oai:episciences.org:eid:7781","50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"],"pid":[],"dateofcollection":"2021-10-05T19:43:46.821Z","dateoftransformation":"2021-10-05T20:15:55.705Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-05T19:43:46.821Z","altered":true,"baseURL":"https%3A%2F%2Foai.episciences.org%2F","identifier":"oai:episciences.org:eid:7781","datestamp":"2017-05-01","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]}],"author":[{"fullname":"Tanouayi, Gnon","name":"Gnon","surname":"Tanouayi","rank":1,"pid":[],"affiliation":null},{"fullname":"GNANDI, Kissao","name":"Kissao","surname":"Gnandi","rank":2,"pid":[],"affiliation":null},{"fullname":"Ouro-Sama, Kamilou","name":"Kamilou","surname":"Ouro-Sama","rank":3,"pid":[],"affiliation":null},{"fullname":"Ahoudi, Housséni","name":"Housséni","surname":"Ahoudi","rank":4,"pid":[],"affiliation":null},{"fullname":"Solitoke, Hodabalo Dhéoulaba","name":"Hodabalo Dhéoulaba","surname":"Solitoke","rank":5,"pid":[],"affiliation":null},{"fullname":"Badassan, Tchaa Esso-Essinam","name":"Tchaa Esso-Essinam","surname":"Badassan","rank":6,"pid":[],"affiliation":null},{"fullname":"Nyametso, A. Yawovi","name":"A. Yawovi","surname":"Nyametso","rank":7,"pid":[],"affiliation":null},{"fullname":"Agbéko, Aduayi-Akué Adoté","name":"Aduayi-Akué Adoté","surname":"Agbéko","rank":8,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"fra/fre","classname":"French","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"clay","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"geo-materials","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"phosphorite of Hahotoé-Kpogamé","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"removal of fluoride","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"argilite","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"élimination du fluor","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"géo-matériaux","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"phosphate de Hahotoé-Kpogamé","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"[SDE.IE]Environmental Sciences/Environmental Engineering","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Défluoruration des eaux à l’aide des résidus du traitement des phosphates naturels et des argilites feuilletées","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"This study is a contribution to the development of adsorption techniques for the removal of fluoride in natural waters. The work is carried out on a laboratory scale using local geo-materials sorbents, on the one hand the residues from the treatment of natural phosphorite of Hahotoé-Kpogamé and on the other hand the attapulgite clay mineral from the costal basin of Togo. The work carried out concerns the adsorption of fluoride on those sorbents. The following parameters are batch tested on synthetic fluoride solutions: time, solution pH, geo-material dose and fluoride concentration. The fluoride is analyzed by absorption spectrometry. The adsorption yields on the phosphorite treatment residues for aqueous fluoride solutions at an initial concentration of 10 mg/L and an adsorbent concentration of 10 g/L are 49 % at pH 6.5 and 66 % at pH 4.0. In the same experimental conditions, the yields on clay minerals are 28.2 % and 36.3 %. These yields are logically improved by increasing the adsorbent dosage (from 2 to 30 g/L). Additional tests are carried out on natural water at an initial fluoride concentration of 3.76 mg/L.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Cette étude est une contribution au développement de techniques d’adsorption pour l’élimination du fluor dans les eaux naturelles. Les travaux ont été réalisés à l’échelle du laboratoire en utilisant comme sorbants des géo-matériaux locaux, d’une part les résidus du traitement des phosphates naturels de Hahotoé-Kpogamé et d’autre part l’argilite feuilletée du bassin sédimentaire côtier du Togo. Les travaux réalisés concernent l’adsorption du fluor sur les sorbants considérés. Les paramètres suivants ont été testés en batch sur des solutions synthétiques de fluor : le temps, le pH de la solution, la dose du géo-matériau et la concentration du fluor. Le fluor a été dosé par spectromètrie d’absorption. Les rendements d’adsorption sur les résidus de traitement des phosphates pour des solutions aqueuses de fluor à concentration initiale de 10 mg/L et une concentration en adsorbant de 10 g/L ont été de 49 % à pH 6,5 et 66 % à pH de 4,0. Dans les mêmes conditions expérimentales, les rendements sur les argilites ont été de 28,2 % et 36,3 %. Ces rendements ont logiquement été améliorés en augmentant le dosage en adsorbant (de 2 à 30 g/L). Des essais complémentaires ont été réalisés sur une eau naturelle à une concentration initiale en fluor de 3,76 mg/L.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-05-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[{"value":"ISSN: 2778-844X","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Environnement, Ingénierie & Développement","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Episciences.org","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"eid:7781 - Environnement, Ingénierie & Développement, 2017-05-01, N°73 - mai 2017","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[{"value":"Importer of dst articles previously hosted by inist Eid system account","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences","dataInfo":null},"url":["https://eid.episciences.org/7781"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.4267/dechets-sciences-techniques.3534","qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-05-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635147062560,"id":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","originalId":["oai:ojs2.rsglobal.pl:article/895","50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"],"pid":[],"dateofcollection":"2020-06-01T07:11:47.22Z","dateoftransformation":"2020-07-25T07:25:11.051Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-06-01T07:11:47.22Z","altered":true,"baseURL":"https%3A%2F%2Frsglobal.pl%2Findex.php%2Findex%2Foai","identifier":"oai:ojs2.rsglobal.pl:article/895","datestamp":"2020-05-23T18:09:46Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Polytrauma","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"multiple trauma","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"combined injury injury","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"the severity of the damage","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"the device of external fixator","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"submersible osteosynthesis","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"«damage control»","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Polytrauma","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"multiple trauma","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"combined injury injury","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"the severity of the damage","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"the device of external fixator","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"submersible osteosynthesis","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"«damage control»","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The results of treatment of 21 patients with multiple injuries, including 12 (57.2%) patients with lesions of limb bones, 9 (42.8%) patients with injuries of the pelvis treated at the Department of Traumatology number 2, 5 for the period of 2013 to 2014 were analyzed by the authors. Developed gentle immobilization of the lower limbs in patients with multiple injuries provides adequate fixation and extension of the lower limb in the intensive care period and during emergency external fixation by the device of external fixation device shin bone or hip.The introduction of surgical treatment of patients in the acute period of polytrauma, using minimally invasive fracture fixation technology allowed providing the early stabilization of the victim’s condition, to avoid diagnostic errors and obtain positive results of treatment in 98% of cases.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2016-02-28","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"RS Global Sp. z O.O.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"World Science; Vol 3 No 2(6) (2016): World Science; 43-50","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"World Science; Том 3 № 2(6) (2016): World Science; 43-50","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2414-6404","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2413-1032","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"application/pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by/4.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|07b5c0ccd4fe::fae55652d82f1c4f6a6e3d2637b9ebe1","value":"World Science","dataInfo":null},"url":["https://rsglobal.pl/index.php/ws/article/view/895"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2016-02-28","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"World Science","issnPrinted":"2414-6404","issnOnline":"2413-1032","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} +{"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":true,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146785228,"id":"50|355e65625b88::210c52944502777ba567442480e6a76e","originalId":["50|355e65625b88::210c52944502777ba567442480e6a76e","oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"pid":[],"dateofcollection":"2021-10-04T12:38:49.007Z","dateoftransformation":"2021-10-04T16:48:12.265Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-04T12:38:49.007Z","altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","identifier":"oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405","datestamp":"2019-09-23T08:45:14Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[{"fullname":"Kemppainen, Mika","name":"Mika","surname":"Kemppainen","rank":1,"pid":[],"affiliation":null},{"fullname":"Virkkunen, Iikka","name":"Iikka","surname":"Virkkunen","rank":2,"pid":[],"affiliation":null},{"fullname":"Pitkänen, Jorma","name":"Jorma","surname":"Pitkänen","rank":3,"pid":[],"affiliation":null},{"fullname":"Paussu, Raimo","name":"Raimo","surname":"Paussu","rank":4,"pid":[],"affiliation":null},{"fullname":"Hänninen, Hannu","name":"Hannu","surname":"Hänninen","rank":5,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"Comparison of realistic artificial cracks and in-service cracks","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2003-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[{"value":"Kemppainen , M , Virkkunen , I , Pitkänen , J , Paussu , R & Hänninen , H 2003 , ' Comparison of realistic artificial cracks and in-service cracks ' , The e-Journal of Nondestructive Testing & Ultrasonics , vol. 8 , no. 3 , 6 . < http://www.ndt.net/article/ecndt02/401/401.htm >","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"url":["https://cris.vtt.fi/en/publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2003-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":true,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146808523,"id":"50|355e65625b88::3754cff043a1700077031ea29f8cc240","originalId":["50|355e65625b88::3754cff043a1700077031ea29f8cc240","oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"pid":[],"dateofcollection":"2021-10-04T12:35:21.081Z","dateoftransformation":"2021-10-04T18:07:22.62Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-04T12:35:21.081Z","altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","identifier":"oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679","datestamp":"2021-08-31T11:55:56Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[{"fullname":"Kelhä, Väinö","name":"Väinö","surname":"Kelhä","rank":1,"pid":[],"affiliation":null},{"fullname":"Manninen, M.","name":"M.","surname":"Manninen","rank":2,"pid":[],"affiliation":null},{"fullname":"Oittinen, P.","name":"P.","surname":"Oittinen","rank":3,"pid":[],"affiliation":null},{"fullname":"Tiesmäki, Jarkko","name":"Jarkko","surname":"Tiesmäki","rank":4,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"A parallel plate tackmeter for measuring the splitting resistance of printing inks","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"1974-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[{"value":"Kelhä , V , Manninen , M , Oittinen , P & Tiesmäki , J 1974 , ' A parallel plate tackmeter for measuring the splitting resistance of printing inks ' , Surface Coatings International: JOCCA , vol. 57 , pp. 184-188 .","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"url":["https://cris.vtt.fi/en/publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"1974-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146809712,"id":"50|355e65625b88::3875365f5052758953b072682e62bc80","originalId":["oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","50|355e65625b88::3875365f5052758953b072682e62bc80"],"pid":[],"dateofcollection":"2021-10-04T12:44:18.145Z","dateoftransformation":"2021-10-04T18:11:36.436Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-04T12:44:18.145Z","altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","identifier":"oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","datestamp":"2021-09-17T12:01:51Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[{"fullname":"Mononen, Petri","name":"Petri","surname":"Mononen","rank":1,"pid":[],"affiliation":null},{"fullname":"Innamaa, Satu","name":"Satu","surname":"Innamaa","rank":2,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"Enhancing journey quality:Field Operational Test of Aftermarket and Nomadic Devices in Vehicles","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2013-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[{"value":"Mononen , P & Innamaa , S 2013 , ' Enhancing journey quality : Field Operational Test of Aftermarket and Nomadic Devices in Vehicles ' , Baltic Transport Journal , pp. 46-47 .","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"url":["https://cris.vtt.fi/en/publications/8602dae4-00e8-4f45-828b-65a367eb4730"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2013-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146840046,"id":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","originalId":["oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"],"pid":[],"dateofcollection":"2021-10-04T12:44:48.87Z","dateoftransformation":"2021-10-04T19:47:43.327Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-04T12:44:48.87Z","altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","identifier":"oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","datestamp":"2021-05-18T10:33:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[{"fullname":"Tsupari, Eemeli","name":"Eemeli","surname":"Tsupari","rank":1,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"energy","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"environmental science","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"climate change mitigation","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"greenhouse gases","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"carbon dioxide","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"emissions trading","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"economic feasibility","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"/dk/atira/pure/sustainabledevelopmentgoals/climate_action","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"SDG 13 - Climate Action","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Impact of system boundaries on the effectiveness of climate change mitigation actions:Dissertation","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Despite international agreements, global greenhouse gas (GHG) emissions have not decreased according to the targets. Consequently, our generation is creating an enormous problem for future generations. As climate change is a global problem, GHG emissions must decrease globally. Consequently, international policies are needed, actions should be effective and the impacts should be assessed with broad boundaries. In Europe, the cornerstone of climate policy is the EU Emissions Trading Scheme (EU ETS) but the rebound impacts within the EU ETS are often excluded in the assessments. This dissertation examines the impacts of major CO2 emission reduction solutions with different system boundaries, highlighting the importance of boundary selection on the results. In addition, the economic feasibilities of the selected solutions are evaluated.The case examples represent the most important sectors in terms of global CO2 emissions, such as electricity and heat production, the steel industry and transport. The studied technologies include efficient Waste-to-Energy (WtE) concepts with high power-to-heat ratio, utilisation of CO2 Capture and Storage (CCS) in different applications, replacing steel mill blast furnaces with Oxygen Blast Furnaces (OBF), Combined Heat and Power (CHP) and Carbon Capture and Utilisation (CCU) for storable fuels, which can be used for example in transportation. The results highlight the importance of the consequences in the electricity production system as well as the rebound impacts in the EU ETS. For example, the studied concepts to decrease direct GHG emissions of steel mills lead to increased power purchase from markets and consequently increase in emissions of the power system. The impacts of CCU concepts based on electrolysis increase the emissions in electricity production but enable a decrease in the usage of fossil fuels in transportation. In addition, converting electricity to storable fuels enable higher shares of variable solar and wind energy in the power systems. The consequences in the power systems are complex, including for example the impacts on electricity imports and exports, future investments and the EU ETS. Even if these impacts can be recognised by qualitative means, unambiguous quantitative consequences cannot be given. Understanding the decisive impacts of the framework and boundaries is crucial to interpreting different assessments and making effective actions and policy decisions. Solutions which decrease emissions within a narrow system boundary can actually increase the emissions of the broader system.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Aalto University","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"Tsupari , E 2018 , ' Impact of system boundaries on the effectiveness of climate change mitigation actions : Dissertation ' , Doctor Degree , Aalto University . < http://urn.fi/URN:ISBN:978-952-60-8358-2 >","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0006","classname":"Doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"url":["https://cris.vtt.fi/en/publications/d62ac5ef-7347-400f-95b2-59d970ceb505"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2018-10-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 02bc5d8d4e..53b5938e09 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.21] + [2.8.22-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 9cb195314fa15ff689cd12d66c3b299f006e7694 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 11 Nov 2021 10:17:40 +0100 Subject: [PATCH 39/60] implemented and tested resolution of entities --- .../resolution/SparkResolveEntities.scala | 96 +++++++++ .../resolution/SparkResolveRelation.scala | 2 +- .../resolution/resolve_entities_params.json | 6 + .../resolution/ResolveEntitiesTest.scala | 190 ++++++++++++++++++ .../dnetlib/dhp/oa/graph/resolution/dataset | 3 + .../oa/graph/resolution/otherresearchproduct | 2 + .../dhp/oa/graph/resolution/publication | 3 + .../dnetlib/dhp/oa/graph/resolution/software | 1 + .../dnetlib/dhp/oa/graph/resolution/updates | 4 + 9 files changed, 306 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/otherresearchproduct create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/publication create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/software create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/updates diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala new file mode 100644 index 0000000000..60df58e45f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -0,0 +1,96 @@ +package eu.dnetlib.dhp.oa.graph.resolution + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.common.EntityType +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} + +object SparkResolveEntities { + + val mapper = new ObjectMapper() + val entities = List(EntityType.dataset,EntityType.publication, EntityType.software, EntityType.otherresearchproduct) + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphBasePath = parser.get("graphBasePath") + log.info(s"graphBasePath -> $graphBasePath") + val workingPath = parser.get("workingPath") + log.info(s"workingPath -> $workingPath") + val unresolvedPath = parser.get("unresolvedPath") + log.info(s"unresolvedPath -> $unresolvedPath") + + + resolveEntities(spark, workingPath, unresolvedPath) + + + generateResolvedEntities(spark, workingPath, graphBasePath) + + } + + +def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + import spark.implicits._ + + val rPid: Dataset[(String, String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String, String)] + val up: Dataset[(String, Result)] = spark.read.text(unresolvedPath).as[String].map(s => mapper.readValue(s, classOf[Result])).map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, resEncoder)) + + rPid.joinWith(up, rPid("_2").equalTo(up("_1")), "inner").map { + r => + val result = r._2._2 + val dnetId = r._1._1 + result.setId(dnetId) + result + }.write.mode(SaveMode.Overwrite).save(s"$workingPath/resolvedEntities") + } + + + def deserializeObject(input:String, entity:EntityType ) :Result = { + + entity match { + case EntityType.publication => mapper.readValue(input, classOf[Publication]) + case EntityType.dataset => mapper.readValue(input, classOf[OafDataset]) + case EntityType.software=> mapper.readValue(input, classOf[Software]) + case EntityType.otherresearchproduct=> mapper.readValue(input, classOf[OtherResearchProduct]) + } + } + + def generateResolvedEntities(spark:SparkSession, workingPath: String, graphBasePath:String) = { + + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + import spark.implicits._ + + val re:Dataset[Result] = spark.read.load(s"$workingPath/resolvedEntities").as[Result] + entities.foreach { + e => + + spark.read.text(s"$graphBasePath/$e").as[String] + .map(s => deserializeObject(s, e)) + .union(re) + .groupByKey(_.getId) + .reduceGroups { + (x, y) => + x.mergeFrom(y) + x + }.map(_._2) + .filter(r => r.getClass.getSimpleName.toLowerCase != "result") + .map(r => mapper.writeValueAsString(r))(Encoders.STRING) + .write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingPath/resolvedGraph/$e") + } + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index db93bc43fa..9e44e017ce 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -128,7 +128,7 @@ object SparkResolveRelation { source != null } - private def extractPidResolvedTableFromJsonRDD(spark: SparkSession, graphPath: String, workingPath: String) = { + def extractPidResolvedTableFromJsonRDD(spark: SparkSession, graphPath: String, workingPath: String) = { import spark.implicits._ val d: RDD[(String, String)] = spark.sparkContext.textFile(s"$graphPath/*") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json new file mode 100644 index 0000000000..f38cc1291f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"u", "paramLongName":"unresolvedPath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala new file mode 100644 index 0000000000..9a142d3c0e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -0,0 +1,190 @@ +package eu.dnetlib.dhp.oa.graph.resolution + + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.schema.common.EntityType +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{Result, StructuredProperty} +import org.apache.commons.io.FileUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql._ +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.TestInstance.Lifecycle +import org.junit.jupiter.api.{AfterAll, BeforeAll, Test, TestInstance} + +import java.nio.file.{Files, Path} +import scala.collection.JavaConverters._ +import scala.io.Source + +@TestInstance(Lifecycle.PER_CLASS) +class ResolveEntitiesTest extends Serializable { + + var workingDir:Path = null + + val FAKE_TITLE = "FAKETITLE" + val FAKE_SUBJECT = "FAKESUBJECT" + + var sparkSession:Option[SparkSession] = None + + + @BeforeAll + def setUp() :Unit = { + workingDir = Files.createTempDirectory(getClass.getSimpleName) + + val conf = new SparkConf() + sparkSession = Some(SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master("local[*]").getOrCreate()) + populateDatasets(sparkSession.get) + generateUpdates(sparkSession.get) + + } + + + @AfterAll + def tearDown():Unit = { + FileUtils.deleteDirectory(workingDir.toFile) + sparkSession.get.stop() + + + } + + + + + def generateUpdates(spark:SparkSession):Unit = { + val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString + + + val pids:List[String] = template.lines.map{id => + val r = new Result + r.setId(id.toLowerCase.trim) + r.setSubject(List(OafMapperUtils.structuredProperty(FAKE_SUBJECT, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) + r.setTitle(List(OafMapperUtils.structuredProperty(FAKE_TITLE, OafMapperUtils.qualifier("fos","fosCS", "fossSchema", "fossiFIgo"), null)).asJava) + r + }.map{r => + val mapper = new ObjectMapper() + + mapper.writeValueAsString(r)}.toList + + + val sc =spark.sparkContext + + println(sc.parallelize(pids).count()) + + spark.createDataset(sc.parallelize(pids))(Encoders.STRING).write.mode(SaveMode.Overwrite).option("compression", "gzip").text(s"$workingDir/updates") + + + + + + import spark.implicits._ + implicit val resEncoder: Encoder[Result] = Encoders.bean(classOf[Result]) + val ds = spark.read.text(s"$workingDir/updates").as[String].map{s => val mapper = new ObjectMapper() + mapper.readValue(s, classOf[Result])}.collect() + + + + + assertEquals(4, ds.length) + ds.foreach{r => assertNotNull(r.getSubject)} + ds.foreach{r => assertEquals(1,r.getSubject.size())} + ds.foreach{r => assertNotNull(r.getTitle)} + ds.foreach{r => assertEquals(1,r.getTitle.size())} + + + + ds.flatMap(r => r.getTitle.asScala.map(t => t.getValue)).foreach(t => assertEquals(FAKE_TITLE,t)) + ds.flatMap(r => r.getSubject.asScala.map(t => t.getValue)).foreach(t => assertEquals(FAKE_SUBJECT,t)) + + println("generated Updates") + } + + + def populateDatasets(spark:SparkSession):Unit = { + import spark.implicits._ + val entities =SparkResolveEntities.entities + + entities.foreach{ + e => + val template = Source.fromInputStream(this.getClass.getResourceAsStream(s"$e")).mkString + spark.createDataset(spark.sparkContext.parallelize(template.lines.toList)).as[String].write.option("compression", "gzip").text(s"$workingDir/graph/$e") + println(s"Created Dataset $e") + } + SparkResolveRelation.extractPidResolvedTableFromJsonRDD(spark, s"$workingDir/graph", s"$workingDir/work") + + } + + + @Test + def testResolution():Unit = { + val spark:SparkSession = sparkSession.get + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) + + val ds = spark.read.load(s"$workingDir/work/resolvedEntities").as[Result] + + assertEquals(3, ds.count()) + + ds.collect().foreach{ + r => + assertTrue(r.getId.startsWith("50")) + } + } + + + + + private def structuredPContainsValue(l:java.util.List[StructuredProperty], exptectedValue:String):Boolean = { + l.asScala.exists(p =>p.getValue!= null && p.getValue.equalsIgnoreCase(exptectedValue)) + } + + @Test + def testUpdate():Unit = { + val spark:SparkSession = sparkSession.get + import spark.implicits._ + implicit val resEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + val m = new ObjectMapper() + SparkResolveEntities.resolveEntities(spark,s"$workingDir/work", s"$workingDir/updates" ) + SparkResolveEntities.generateResolvedEntities(spark,s"$workingDir/work",s"$workingDir/graph" ) + + + + + val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) + val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + + + + val datDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/dataset").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.dataset)) + val td = datDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + + + val softDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/software").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.software)) + val ts = softDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + + + val orpDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/otherresearchproduct").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.otherresearchproduct)) + val to = orpDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() + + + assertEquals(0, t) + assertEquals(2, td) + assertEquals(1, ts) + assertEquals(0, to) + + } + + + + + + + + + + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset new file mode 100644 index 0000000000..05c875148c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/otherresearchproduct b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/otherresearchproduct new file mode 100644 index 0000000000..f3693848de --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/otherresearchproduct @@ -0,0 +1,2 @@ +{"author":[{"affiliation":[],"fullname":"Martinec, Nikola","name":"Nikola","pid":[],"rank":1,"surname":"Martinec"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"contactgroup":[],"contactperson":[],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2021-10-25T05:50:25+0000","dateoftransformation":"2021-10-26T05:23:26.742Z","description":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Simulacije mekih tijela neizostavan su dio realističnog prikaza stvarnog svijeta u različitim virtualnim okruženjima. U ovom radu obrađene su dvije metode simulacije mekih tijela – metoda s oprugama i metoda usklađivanja oblika. Napravljene su programske implementacije obiju metoda te je prikazana usporedba u obliku brzine izvođenja i vizualne kvalitete simulacija. Metoda uspoređivanja oblika napravljena je po uzoru na rad Meshless Deformations Based on Shape Matching [2] dok su kod metode s oprugama, ponajviše za algoritme raspoređivanja opruga, korištena autorova rješenja. U radu su opisani karakteristični dijelovi potrebni za implementaciju kao i problemi koji su se javljali tijekom izrade ovoga rada."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::14cc6e339ce130a10dbc5d9269dee6a3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"master thesis","classname":"master thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/636614"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433838126,"originalId":["636614","50|57a035e5b1ae::14cc6e339ce130a10dbc5d9269dee6a3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2013-01-01"}],"resourcetype":{"classid":"master thesis","classname":"master thesis","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"meka tijela; simulacija; usklađivanje oblika; opruge i mase"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Simulacija elastičnih objekata"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Soft bodies simulation"}],"tool":[]} +{"author":[{"affiliation":[],"fullname":"Sobočanec, Sandra","name":"Sandra","pid":[],"rank":1,"surname":"Sobočanec"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"contactgroup":[],"contactperson":[],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"dateofcollection":"2021-10-25T05:50:25+0000","dateoftransformation":"2021-10-26T05:22:35.748Z","description":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Propolis je tijekom stoljeća, a u posljednje vrijeme i u suvremenoj medicinskoj praksi ispitivan i korišten kao protuvirusno, protubakterijsko i protugljivično sredstvo. Biološki učinci propolisa vezani su uz antibiotska, protuupalna i antioksidacijska svojstva. Jedan od sastojaka propolisa su flavonoidi, čija je karakteristika visoki antioksidacijski potencijal zbog kojeg im se pripisuje sposobnost prevencije različitih bolesti kao što su različite vrste tumora, osteoporoza, kardiovaskularne i neurodegenerativne bolesti. U našim ispitivanjima koristili smo nativni propolis u dvije doze (100 mg kg-1, 300 mg kg-1), kojem smo istražili sastav, udio i vrstu flavonoida odgovornih za njegov oksidacijski/antioksidacijski učinak. Krizin je bio prisutan u najvećoj koncentraciji, dok je izoramnetin bio prisutan u najmanjoj. Antioksidacijski potencijal nativnog propolisa u in vitro uvjetima je bio relativno nizak. Međutim, propolis značajno povisuje antioksidacijski učinak in vivo uvjetima u jetri, plućima i mozgu miševa. Učinak nativnog propolisa pokazao ovisio je o dozi i to tako da je u manjoj dozi djelovao antioksidacijski (zaštitno), a u većoj dozi prooksidacijski (štetno). Oksidacijski status nakon obradaa nativnim propolisom nije pokazao tkivnu, ali je pokazao doznu ovisnost. Antioksidacijski status pokazao je tkivnu i doznu ovisnost ; u mozgu nije pokazao promjenu, dok je najveća promjena zabilježena u plućima i u manjoj mjeri u jetri. Promjena aktivnosti antioksidacijskih enzima u jetri nije korelirala s količinom njihove mRNA, dok je u plućima promjena u aktivnosti istih uglavnom bila proporcionalna promjeni u mRNA. Nije zapažena značajna promjena u ekpresiji proteina za sve ispitivane antioksidacijske enzime, niti obzirom na dozu, niti obzirom na ispitivano tkivo. Analizom DNA čipova utvrđeno je da propolis u dozi od 100 mg kg-1 u jetri ima zaštitni učinak, dok je u dozi od 300 mg kg-1 taj učinak prooksidacijski. U plućima je propolis u obje doze imao zaštitni učinak, dok u mozgu nije imao učinka na ispitivane gene. U hiperbaričkim uvjetima propolis je smanjio oksidacijski stres u sva tri ispitivana tkiva u dozi od 100 mg kg-1. U plućima je taj učinak propolis ostvario povećanjem aktivnosti antioksidacijskih enzima (MnSOD, KAT). Antioksidacijski učinak propolisa porijeklom iz Hrvatske do sada nije istraživan u in vivo sistemima. Isto tako po prvi puta se ispituje nativni propolis (tehnologijska inovacija) za razliku od do sada ispitivanih vodenih ili alkoholnih ekstrakata. Rezultati ove studije također pokazuju da sposobnost zaštitnog odgovora u plućima na oksidacijski stres koji postoji u novorođenih, ali se gubi u odraslih jedinki, može biti obnovljena uporabom nativnog propolisa tijekom hiperoksije. Ta spoznaja može pomoći u zaštiti od hiperoksije u populaciji odraslih tijekom terapije kisikom."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::15786c1f0f2930ea867a234fbcb8ef08","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"doctoral thesis","classname":"doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/286858"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433372341,"originalId":["286858","50|57a035e5b1ae::15786c1f0f2930ea867a234fbcb8ef08"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2006-01-01"}],"resourcetype":{"classid":"doctoral thesis","classname":"doctoral thesis","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"propolis; flavonoidi; miševi; oksidacijski/antioksidacijski status; hiperoksija; normobarički uvjeti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Utjecaj propolisa na oksidacisjki/antioksidacijski status u CBA miša"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Propolis effect on oxidative/antioxidative status in CBA mice"}],"tool":[]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/publication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/publication new file mode 100644 index 0000000000..562a54760e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/publication @@ -0,0 +1,3 @@ +{"author":[{"fullname":"Makkonen, Lasse","name":"Lasse","pid":[],"rank":1,"surname":"Makkonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Watson, Rick"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"dateofcollection":"2021-10-04T12:42:57.502Z","dateoftransformation":"2021-10-04T15:32:51.877Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433424020,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-10-26T12:07:44Z","harvestDate":"2021-10-04T12:42:57.502Z","identifier":"oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::0b579a3501cf87921448e0a1c7fc8353","oai:cris.vtt.fi:publications/5a6fbe30-2096-4106-96f4-ed36620d3f73"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Irish Wind Energy Association"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Makkonen , L 1997 , Calculation of ice accretion on wind turbine blades . in R Watson (ed.) , EWEC '97: European Wind Energy Conference : Proceedings of the international conference . Irish Wind Energy Association , Slane , European Wind Energy Conference EWEC '97 , Dublin , Ireland , 6/10/97 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Calculation of ice accretion on wind turbine blades"}]} +{"author":[{"fullname":"Peltola, Juho","name":"Juho","pid":[],"rank":1,"surname":"Peltola"},{"fullname":"Karvonen, Lassi","name":"Lassi","pid":[],"rank":2,"surname":"Karvonen"},{"fullname":"Elfvengren, J","name":"J.","pid":[],"rank":3,"surname":"Elfvengren"},{"fullname":"Kolehmainen, J","name":"J.","pid":[],"rank":4,"surname":"Kolehmainen"},{"fullname":"Kallio, Sirpa","name":"Sirpa","pid":[],"rank":5,"surname":"Kallio"},{"fullname":"Pallares, D","name":"D.","pid":[],"rank":6,"surname":"Pallares"},{"fullname":"Johnsson, F","name":"F.","pid":[],"rank":7,"surname":"Johnsson"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-10-04T12:31:44.326Z","dateoftransformation":"2021-10-04T17:18:21.611Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This work applies optical probe and particle image velocimetry (PIV) measurements for determining the local characteristics of the solids flow in a CFB riser and discusses benefits and limitations of the two methods. Measurements were carried out in an 8.5 m tall CFB riser with a cross-section of 0.7 m x 0.12 m. The optical probe was used to measure both solids volume fraction and velocity whereas the PIV was limited to solids velocity measurements. In addition, the vertical pressure profile and solids circulation rate were measured by means of pressure transducers. The work demonstrates the challenge of applying detailed solids flow measurements in CFB reactors. Both methods applied are associated with limitations and inaccuracies and it is concluded that a combination of both local measurement methods and the global pressure measurements will improve the interpretation of the solids flow field in CFB risers

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2a13bce11d3eb09889706369b114a476","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/c7012d64-5947-4f40-b62f-6c3f232af0b0"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433455504,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-26T12:04:31Z","harvestDate":"2021-10-04T12:31:44.326Z","identifier":"oai:cris.vtt.fi:publications/c7012d64-5947-4f40-b62f-6c3f232af0b0","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/c7012d64-5947-4f40-b62f-6c3f232af0b0","50|355e65625b88::2a13bce11d3eb09889706369b114a476"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institute of Process Engineering"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Peltola , J , Karvonen , L , Elfvengren , J , Kolehmainen , J , Kallio , S , Pallares , D & Johnsson , F 2014 , Measurement of solids velocity and concentration in a large cold CFB model . in CFB-11: Proceedings of the 11th International Conference on Fluidized Bed Technology . Institute of Process Engineering , Beijing, China , pp. 589-594 , 11th International Conference on Fluidized Bed Technology, CFB 2014 , Beijing , China , 14/05/14 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Measurement of solids velocity and concentration in a large cold CFB model"}]} +{"author":[{"fullname":"Marszalec, Janusz","name":"Janusz","pid":[],"rank":1,"surname":"Marszalec"},{"fullname":"Järviluoma, Markku","name":"Markku","pid":[],"rank":2,"surname":"Järviluoma"},{"fullname":"Heikkilä, Tapio","name":"Tapio","pid":[],"rank":3,"surname":"Heikkilä"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2021-10-04T12:42:38.338Z","dateoftransformation":"2021-10-05T01:02:19.804Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::b58939ea13408430070558e913a80bdf","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1117/12.135094"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a5d16720-e901-4605-8c46-2d8d920af7e2"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635433591966,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T04:23:59Z","harvestDate":"2021-10-04T12:42:38.338Z","identifier":"oai:cris.vtt.fi:publications/a5d16720-e901-4605-8c46-2d8d920af7e2","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::b58939ea13408430070558e913a80bdf","oai:cris.vtt.fi:publications/a5d16720-e901-4605-8c46-2d8d920af7e2"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"International Society for Optics and Photonics SPIE"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Marszalec , J , Järviluoma , M & Heikkilä , T 1991 , Planning of an active range sensor structure for pose estimation of 3-D regular objects . in Intelligent Robots and Computer Vision X: Neural, Biological, and 3-D Methods . International Society for Optics and Photonics SPIE , Proceedings of SPIE , vol. 1608 , Intelligent Robots and Computer Vision X , Boston , Massachusetts , United States , 10/11/91 . https://doi.org/10.1117/12.135094"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Planning of an active range sensor structure for pose estimation of 3-D regular objects"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/software b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/software new file mode 100644 index 0000000000..da4dd7e9fb --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/software @@ -0,0 +1 @@ +{"author":[{"affiliation":[],"fullname":"Franz, Max","name":"Max","pid":[],"rank":1,"surname":"Franz"},{"affiliation":[],"fullname":"Cheung, Manfred","name":"Manfred","pid":[],"rank":2,"surname":"Cheung"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"@mskcc"}],"fullname":"Sumer, Onur","name":"Onur","pid":[],"rank":3,"surname":"Sumer"},{"affiliation":[],"fullname":"Huck, Gerardo","name":"Gerardo","pid":[],"rank":4,"surname":"Huck"},{"affiliation":[],"fullname":"Fong, Dylan","name":"Dylan","pid":[],"rank":5,"surname":"Fong"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Northeastern University"}],"fullname":"Mullen, Tony","name":"Tony","pid":[],"rank":6,"surname":"Mullen"},{"affiliation":[],"fullname":"Ayhun","name":"","pid":[],"rank":7,"surname":""},{"affiliation":[],"fullname":"Chadkin, Bogdan","name":"Bogdan","pid":[],"rank":8,"surname":"Chadkin"},{"affiliation":[],"fullname":"Metincansiper","name":"","pid":[],"rank":9,"surname":""},{"affiliation":[],"fullname":"Stahl, Joseph","name":"Joseph","pid":[],"rank":10,"surname":"Stahl"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Trisotech"}],"fullname":"Gauthier, Mélanie","name":"Mélanie","pid":[],"rank":11,"surname":"Gauthier"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"@payoneer"}],"fullname":"Sherer, Eli","name":"Eli","pid":[],"rank":12,"surname":"Sherer"},{"affiliation":[],"fullname":"Bumbu","name":"","pid":[],"rank":13,"surname":""},{"affiliation":[],"fullname":"Sidlovsky, Yaroslav","name":"Yaroslav","pid":[],"rank":14,"surname":"Sidlovsky"},{"affiliation":[],"fullname":"Trott, Rich","name":"Rich","pid":[],"rank":15,"surname":"Trott"},{"affiliation":[],"fullname":"Beynon, Mike","name":"Mike","pid":[],"rank":16,"surname":"Beynon"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"University of Toronto"}],"fullname":"Lopes, Christian","name":"Christian","pid":[],"rank":17,"surname":"Lopes"},{"affiliation":[],"fullname":"Li, Alexander","name":"Alexander","pid":[],"rank":18,"surname":"Li"},{"affiliation":[],"fullname":"Cheng, Rui","name":"Rui","pid":[],"rank":19,"surname":"Cheng"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Bio + CS ⇝ ❤"}],"fullname":"Wilzbach, Sebastian","name":"Sebastian","pid":[],"rank":20,"surname":"Wilzbach"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"@atlassian"}],"fullname":"Dias, Mike","name":"Mike","pid":[],"rank":21,"surname":"Dias"},{"affiliation":[],"fullname":"Janit Mehta","name":"","pid":[],"rank":22,"surname":""},{"affiliation":[],"fullname":"Meira, Gui","name":"Gui","pid":[],"rank":23,"surname":"Meira"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"InfoTrack"}],"fullname":"Ryding, Daniel","name":"Daniel","pid":[],"rank":24,"surname":"Ryding"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Clarivate Analytics"}],"fullname":"Mosca, Roberto","name":"Roberto","pid":[],"rank":25,"surname":"Mosca"},{"affiliation":[],"fullname":"Jalil, F.","name":"F.","pid":[],"rank":26,"surname":"Jalil"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"@testdouble"}],"fullname":"Greenwood, Josh","name":"Josh","pid":[],"rank":27,"surname":"Greenwood"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Adpearance"}],"fullname":"Buchmann, Eric","name":"Eric","pid":[],"rank":28,"surname":"Buchmann"},{"affiliation":[],"fullname":", Dominic","name":"","pid":[],"rank":29,"surname":""},{"affiliation":[],"fullname":"Osborn, Connor","name":"Connor","pid":[],"rank":30,"surname":"Osborn"}],"bestaccessright":{"classid":"OPEN SOURCE","classname":"Open Source","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-29"},"dateofcollection":"2018-10-28T00:39:04.337Z","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Graph theory (network) library for visualisation and analysis"}],"documentationUrl":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doi_________::0682c24009ab5c1624d1c032783feff1","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-29"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5281/zenodo.1473694","https://zenodo.org/record/1473694"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5281/zenodo.1473694"]},{"accessright":{"classid":"OPEN SOURCE","classname":"Open Source","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5281/zenodo.1473694"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"value":"2018-10-29"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://dx.doi.org/10.5281/zenodo.1473694"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-29"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0000","classname":"Unknown","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5281/zenodo.1473694"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635435250840,"license":[],"measures":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-10T11:21:15.546Z","identifier":"10.5281/zenodo.1473694","metadataNamespace":""}},"originalId":["50|datacite____::0682c24009ab5c1624d1c032783feff1","10.5281/zenodo.1473694","oai:zenodo.org:1473694","50|od______2659::098cabdf6ac7d52b3d969c89a1ae95ba","50|r37b0ad08687::098cabdf6ac7d52b3d969c89a1ae95ba"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1473694"}],"programmingLanguage":{"classid":"","classname":"","schemeid":"","schemename":""},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Zenodo"},"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2018-10-29"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cytoscape/Cytoscape.Js V3.2.19"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/updates b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/updates new file mode 100644 index 0000000000..04ad7a79bd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/updates @@ -0,0 +1,4 @@ +unresolved::10.17026/dans-x3z-fsq5::doi +unresolved::10.17026/dans-xsw-qtnx::doi +unresolved::10.5281/zenodo.1473694::doi +unresolved::10.17632/fake::doi From 2ca0a436ad60f8110c09379b7c365cdfcfcf0480 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 11 Nov 2021 10:25:42 +0100 Subject: [PATCH 40/60] added SparkResolveEntities node to the oozie wf --- .../resolution/SparkResolveEntities.scala | 17 ++++++++-- .../graph/resolution/oozie_app/workflow.xml | 32 +++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index 60df58e45f..afd195ed04 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -2,9 +2,11 @@ package eu.dnetlib.dhp.oa.graph.resolution import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.common.HdfsSupport import eu.dnetlib.dhp.schema.common.EntityType import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils +import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkConf import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} @@ -34,13 +36,22 @@ object SparkResolveEntities { val unresolvedPath = parser.get("unresolvedPath") log.info(s"unresolvedPath -> $unresolvedPath") + val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) + fs.mkdirs(new Path(workingPath)) resolveEntities(spark, workingPath, unresolvedPath) - - generateResolvedEntities(spark, workingPath, graphBasePath) - } + // TO BE conservative we keep the original entities in the working dir + // and save the resolved entities on the graphBasePath + //In future these lines of code should be removed + entities.foreach { + e => + fs.rename(new Path(s"$graphBasePath/$e"), new Path(s"$workingPath/${e}_old")) + fs.rename(new Path(s"$workingPath/resolvedGraph/$e"), new Path(s"$graphBasePath/$e")) + } + +} def resolveEntities(spark: SparkSession, workingPath: String, unresolvedPath: String) = { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 52f0e6e9d0..4773fc87c6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -4,6 +4,10 @@ graphBasePath the path of the graph + + unresolvedPath + the path of the unresolved Entities + @@ -36,5 +40,33 @@ + + + + yarn + cluster + Resolve Relations in raw graph + eu.dnetlib.dhp.oa.graph.resolution.SparkResolveEntities + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=10000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --graphBasePath${graphBasePath} + --unresolvedPath${unresolvedPath} + --workingPath${workingDir} + + + + + + \ No newline at end of file From 935062edecc4fe1f87fdc0fd5ca7f07f2a5e8e56 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 11 Nov 2021 16:11:25 +0100 Subject: [PATCH 41/60] [Bypass Action Set] creation of unresolved entities --- .../createunresolvedentities/Constants.java | 55 ++++ .../createunresolvedentities}/GetFOSData.java | 18 +- .../PrepareBipFinder.java | 144 ++++++++++ .../PrepareFOSSparkJob.java | 74 ++++- .../SparkSaveUnresolved.java | 79 ++++++ .../model/BipDeserialize.java | 2 +- .../model/BipScore.java | 2 +- .../model/FOSDataModel.java | 2 +- .../model/KeyValue.java | 2 +- .../model/Score.java | 2 +- .../bip_prepare_parameters.json | 4 +- .../distribute_fos_parameters.json | 0 .../get_fos_parameters.json | 6 + .../oozie_app/config-default.xml | 30 +++ .../oozie_app/workflow.xml | 155 +++++++++++ .../produce_unresolved_parameters.json} | 16 +- .../createunresolvedentities/PrepareTest.java | 250 +++++++++++++++++ .../createunresolvedentities/ProduceTest.java | 234 ++++++++++++++++ .../createunresolvedentities}/bip/bip.json | 2 +- .../createunresolvedentities}/fos/fos.json | 0 .../fos/h2020_fos_sbs.csv | 0 .../eu/dnetlib/dhp/bypassactionset/Utils.java | 17 -- .../bipfinder/PrepareBipFinder.java | 89 ------ .../bipfinder/SparkUpdateBip.java | 131 --------- .../bypassactionset/fos/SparkUpdateFOS.java | 121 --------- .../opencitations/GetOpenCitationsRefs.java | 93 ------- .../opencitations/SparkUpdateOCRels.java | 150 ----------- .../bip_update_parameters.json | 32 --- .../dnetlib/dhp/bypassactionset/BipTest.java | 250 ----------------- .../dnetlib/dhp/bypassactionset/FOSTest.java | 253 ------------------ .../dhp/bypassactionset/bip/preparedbip.json | 86 ------ .../bypassactionset/bip/publicationmatch.json | 6 - .../bip/publicationnomatch.json | 6 - .../dhp/bypassactionset/fos/fos_prepared.json | 50 ---- .../bypassactionset/fos/publicationmatch.json | 6 - .../dhp/oa/graph/dump/DumpJobTest.java | 1 - 36 files changed, 1033 insertions(+), 1335 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java rename dhp-workflows/{dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/GetFOSData.java (77%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java rename dhp-workflows/{dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/PrepareFOSSparkJob.java (51%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java rename dhp-workflows/{dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/model/BipDeserialize.java (87%) rename dhp-workflows/{dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/model/BipScore.java (88%) rename dhp-workflows/{dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/model/FOSDataModel.java (95%) rename dhp-workflows/{dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/model/KeyValue.java (83%) rename dhp-workflows/{dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/model/Score.java (85%) rename dhp-workflows/{dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/bip_prepare_parameters.json (88%) rename dhp-workflows/{dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/distribute_fos_parameters.json (100%) rename dhp-workflows/{dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/get_fos_parameters.json (79%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml rename dhp-workflows/{dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/produce_unresolved_parameters.json} (55%) create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java rename dhp-workflows/{dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/bip/bip.json (98%) rename dhp-workflows/{dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/fos/fos.json (100%) rename dhp-workflows/{dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset => dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities}/fos/h2020_fos_sbs.csv (100%) delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/BipTest.java delete mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/FOSTest.java delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java new file mode 100644 index 0000000000..cae4d6d438 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java @@ -0,0 +1,55 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import java.util.Optional; + +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Qualifier; + +public class Constants { + + public static final String UNREOSLVED_PREFIX = "unresolved:"; + public static final String UNREOSLVED_POSTFIX_DOI = ":doi"; + public static final String DOI = "doi"; + + public static final String UPDATE_DATA_INFO_TYPE = "update"; + public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; + public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE"; + public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; + + public static final String FOS_CLASS_ID = "FOS"; + public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; + + public final static String NULL = "NULL"; + + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) { + return Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + } + + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } + + public static String getUnresolvedDoiIndentifier(String doi) { + StringBuilder sb = new StringBuilder(); + sb.append(UNREOSLVED_PREFIX).append(doi).append(UNREOSLVED_POSTFIX_DOI); + return sb.toString(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java similarity index 77% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java index 9eceb7c5f7..9dec3e8626 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/GetFOSData.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/GetFOSData.java @@ -1,7 +1,9 @@ -package eu.dnetlib.dhp.bypassactionset.fos; +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import java.io.*; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Serializable; import java.util.Objects; import java.util.Optional; @@ -29,17 +31,17 @@ public class GetFOSData implements Serializable { .requireNonNull( GetFOSData.class .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json")))); + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json")))); parser.parseArgument(args); // the path where the original fos csv file is stored - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}", inputPath); + final String sourcePath = parser.get("sourcePath"); + log.info("sourcePath {}", sourcePath); // the path where to put the file as json - final String outputFile = parser.get("outputFile"); - log.info("outputFile {}", outputFile); + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); final String hdfsNameNode = parser.get("hdfsNameNode"); log.info("hdfsNameNode {}", hdfsNameNode); @@ -58,7 +60,7 @@ public class GetFOSData implements Serializable { FileSystem fileSystem = FileSystem.get(conf); - new GetFOSData().doRewrite(inputPath, outputFile, classForName, delimiter, fileSystem); + new GetFOSData().doRewrite(sourcePath, outputPath, classForName, delimiter, fileSystem); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java new file mode 100644 index 0000000000..af87094ffb --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -0,0 +1,144 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.UPDATE_CLASS_NAME; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.hdfs.client.HdfsUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.BipDeserialize; +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.BipScore; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Measure; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; + +public class PrepareBipFinder implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareBipFinder.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip_prepare_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String sourcePath = parser.get("sourcePath"); + log.info("sourcePath {}: ", sourcePath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); + prepareResults(spark, sourcePath, outputPath); + }); + } + + private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD bipDeserializeJavaRDD = sc + .textFile(inputPath) + .map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class)); + + spark + .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { + BipScore bs = new BipScore(); + bs.setId(key); + bs.setScoreList(entry.get(key)); + return bs; + }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) + .map((MapFunction) v -> { + Result r = new Result(); + + r.setId(getUnresolvedDoiIndentifier(v.getId())); + r.setMeasures(getMeasure(v)); + return r; + }, Encoders.bean(Result.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/bip"); + } + + private static List getMeasure(BipScore value) { + return value + .getScoreList() + .stream() + .map(score -> { + Measure m = new Measure(); + m.setId(score.getId()); + m + .setUnit( + score + .getUnit() + .stream() + .map(unit -> { + KeyValue kv = new KeyValue(); + kv.setValue(unit.getValue()); + kv.setKey(unit.getKey()); + kv + .setDataInfo( + OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, + OafMapperUtils + .qualifier( + UPDATE_MEASURE_BIP_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "")); + return kv; + }) + .collect(Collectors.toList())); + return m; + }) + .collect(Collectors.toList()); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java similarity index 51% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 0f9f00bb5c..a84990fd34 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -1,30 +1,30 @@ -package eu.dnetlib.dhp.bypassactionset.fos; +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier; +import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; +import java.util.*; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class PrepareFOSSparkJob implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class); @@ -35,7 +35,7 @@ public class PrepareFOSSparkJob implements Serializable { .toString( PrepareFOSSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json")); + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -55,7 +55,6 @@ public class PrepareFOSSparkJob implements Serializable { conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); distributeFOSdois( spark, sourcePath, @@ -74,13 +73,60 @@ public class PrepareFOSSparkJob implements Serializable { final String level3 = v.getLevel3(); Arrays .stream(v.getDoi().split("\u0002")) - .forEach(d -> fosList.add(FOSDataModel.newInstance(getIdentifier(d), level1, level2, level3))); + .forEach(d -> fosList.add(FOSDataModel.newInstance(d, level1, level2, level3))); return fosList.iterator(); }, Encoders.bean(FOSDataModel.class)) + .map((MapFunction) value -> { + Result r = new Result(); + r.setId(getUnresolvedDoiIndentifier(value.getDoi())); + r.setSubject(getSubjects(value)); + return r; + }, Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath); + .json(outputPath + "/fos"); + } + + private static List getSubjects(FOSDataModel fos) { + return Arrays + .asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3())) + .stream() + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + + private static StructuredProperty getSubject(String sbj) { + if (sbj.equals(NULL)) + return null; + StructuredProperty sp = new StructuredProperty(); + sp.setValue(sbj); + sp + .setQualifier( + OafMapperUtils + .qualifier( + FOS_CLASS_ID, + FOS_CLASS_NAME, + ModelConstants.DNET_SUBJECT_TYPOLOGIES, + ModelConstants.DNET_SUBJECT_TYPOLOGIES)); + sp + .setDataInfo( + OafMapperUtils + .dataInfo( + false, + UPDATE_DATA_INFO_TYPE, + true, + false, + OafMapperUtils + .qualifier( + UPDATE_SUBJECT_FOS_CLASS_ID, + UPDATE_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "")); + + return sp; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java new file mode 100644 index 0000000000..62b813602b --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -0,0 +1,79 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class SparkSaveUnresolved implements Serializable { + private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareFOSSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/produce_unresolved_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String sourcePath = parser.get("sourcePath"); + log.info("sourcePath: {}", sourcePath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + saveUnresolved( + spark, + sourcePath, + + outputPath); + }); + } + + private static void saveUnresolved(SparkSession spark, String sourcePath, String outputPath) { + + spark + .read() + .textFile(sourcePath + "/*") + .map( + (MapFunction) l -> OBJECT_MAPPER.readValue(l, Result.class), + Encoders.bean(Result.class)) + .groupByKey((MapFunction) r -> r.getId(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> { + Result ret = it.next(); + it.forEachRemaining(r -> ret.mergeFrom(r)); + return ret; + }, Encoders.bean(Result.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipDeserialize.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java similarity index 87% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipDeserialize.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java index a2c7601a88..f950d92604 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipDeserialize.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipDeserialize.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.bypassactionset.model; +package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; import java.io.Serializable; import java.util.ArrayList; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipScore.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java similarity index 88% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipScore.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java index fb2687704a..c36856a5b0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/BipScore.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/BipScore.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.bypassactionset.model; +package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; import java.io.Serializable; import java.util.List; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java similarity index 95% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java index 564de01659..befb230cb4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/FOSDataModel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/FOSDataModel.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.bypassactionset.model; +package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; import java.io.Serializable; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/KeyValue.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java similarity index 83% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/KeyValue.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java index 2f3379308f..4384e4ba12 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/KeyValue.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/KeyValue.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.bypassactionset.model; +package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; import java.io.Serializable; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/Score.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java similarity index 85% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/Score.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java index 76580fb239..3d1cca9a0e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/model/Score.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/model/Score.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.bypassactionset.model; +package eu.dnetlib.dhp.actionmanager.createunresolvedentities.model; import java.io.Serializable; import java.util.List; diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip_prepare_parameters.json similarity index 88% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip_prepare_parameters.json index 7663a454b9..b7bad73e6d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip_prepare_parameters.json @@ -6,8 +6,8 @@ "paramRequired": false }, { - "paramName": "ip", - "paramLongName": "inputPath", + "paramName": "sp", + "paramLongName": "sourcePath", "paramDescription": "the URL from where to get the programme file", "paramRequired": true }, diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/distribute_fos_parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json similarity index 79% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json index 8b663e080c..050a25677c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/get_fos_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json @@ -23,5 +23,11 @@ "paramLongName": "hdfsNameNode", "paramDescription": "the path used to store the HostedByMap", "paramRequired": true + }, + { + "paramName": "cfn", + "paramLongName": "classForName", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/config-default.xml new file mode 100644 index 0000000000..d262cb6e05 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml new file mode 100644 index 0000000000..31c0186186 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml @@ -0,0 +1,155 @@ + + + + fosPath + the input path of the resources to be extended + + + + bipScorePath + the path where to find the bipFinder scores + + + outputPath + the path where to store the actionset + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + yarn + cluster + Produces the unresolved from bip finder! + eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareBipFinder + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${bipScorePath} + --outputPath${workingDir}/prepared/bip + + + + + + + + eu.dnetlib.dhp.actionmanager.createunresolvedentities.GetFOSData + --hdfsNameNode${nameNode} + --sourcePath${fosPath} + --outputPath${workingDir}/input/fos + --classForNameeu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel + + + + + + + + yarn + cluster + Produces the unresolved from FOS! + eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/input/fos + + --outputPath${workingDir}/prepared/fos + + + + + + + + + + + + + yarn + cluster + Saves the result produced for bip and fos by grouping results with the same id + eu.dnetlib.dhp.actionmanager.bipfinder.CollectAndSave + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${workingDir}/prepared + --outputPath${outputPath} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/produce_unresolved_parameters.json similarity index 55% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/produce_unresolved_parameters.json index 6c78811e6a..b7bad73e6d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/produce_unresolved_parameters.json @@ -6,8 +6,8 @@ "paramRequired": false }, { - "paramName": "ip", - "paramLongName": "inputPath", + "paramName": "sp", + "paramLongName": "sourcePath", "paramDescription": "the URL from where to get the programme file", "paramRequired": true }, @@ -16,17 +16,5 @@ "paramLongName": "outputPath", "paramDescription": "the path of the new ActionSet", "paramRequired": true - }, - { - "paramName": "rtn", - "paramLongName": "resultTableName", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - }, - { - "paramName": "fp", - "paramLongName": "fosPath", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java new file mode 100644 index 0000000000..2f7a171b3c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -0,0 +1,250 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import static org.junit.jupiter.api.Assertions.*; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Collectors; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class PrepareTest { + + private static final Logger log = LoggerFactory.getLogger(ProduceTest.class); + + private static Path workingDir; + private static SparkSession spark; + private static LocalFileSystem fs; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(PrepareTest.class.getSimpleName()); + + fs = FileSystem.getLocal(new Configuration()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ProduceTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void bipPrepareTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") + .getPath(); + + PrepareBipFinder + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + "--outputPath", workingDir.toString() + "/work" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/work/bip") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(86, tmp.count()); + + String doi1 = "unresolved:10.0000/096020199389707:doi"; + + Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); + Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getMeasures().size()); + Assertions + .assertEquals( + "6.34596412687e-09", tmp + .filter(r -> r.getId().equals(doi1)) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(sl -> sl.getId().equals("influence")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "0.641151896994", tmp + .filter(r -> r.getId().equals(doi1)) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(sl -> sl.getId().equals("popularity_alt")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + Assertions + .assertEquals( + "2.33375102921e-09", tmp + .filter(r -> r.getId().equals(doi1)) + .collect() + .get(0) + .getMeasures() + .stream() + .filter(sl -> sl.getId().equals("popularity")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + } + + @Test + void getFOSFileTest() throws CollectorException, IOException, ClassNotFoundException { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv") + .getPath(); + final String outputPath = workingDir.toString() + "/fos.json"; + + new GetFOSData() + .doRewrite( + sourcePath, outputPath, "eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel", + '\t', fs); + + BufferedReader in = new BufferedReader( + new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class); + + System.out.println(new ObjectMapper().writeValueAsString(fos)); + count += 1; + } + + assertEquals(38, count); + + } + + @Test + void fosPrepareTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") + .getPath(); + + PrepareFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", sourcePath, + + "-outputPath", workingDir.toString() + "/work" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/work/fos") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + String doi1 = "unresolved:10.3390/s18072310:doi"; + + assertEquals(50, tmp.count()); + assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); + assertTrue( + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("engineering and technology")); + + assertTrue( + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("nano-technology")); + assertTrue( + tmp + .filter(r -> r.getId().equals(doi1)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("nanoscience & nanotechnology")); + + String doi = "unresolved:10.1111/1365-2656.12831:doi"; + assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); + assertTrue( + tmp + .filter(r -> r.getId().equals(doi)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("psychology and cognitive sciences")); + + assertTrue( + tmp + .filter(r -> r.getId().equals(doi)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("social sciences")); + assertFalse( + tmp + .filter(r -> r.getId().equals(doi)) + .flatMap(r -> r.getSubject().iterator()) + .map(sbj -> sbj.getValue()) + .collect() + .contains("NULL")); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java new file mode 100644 index 0000000000..b02761750b --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -0,0 +1,234 @@ + +package eu.dnetlib.dhp.actionmanager.createunresolvedentities; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; + +public class ProduceTest { + private static final Logger log = LoggerFactory.getLogger(ProduceTest.class); + + private static Path workingDir; + private static SparkSession spark; + private static LocalFileSystem fs; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final String ID_PREFIX = "50|doi_________"; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(ProduceTest.class.getSimpleName()); + + fs = FileSystem.getLocal(new Configuration()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ProduceTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(ProduceTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void produceTest() throws Exception { + + final String bipPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json") + .getPath(); + + PrepareBipFinder + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", bipPath, + "--outputPath", workingDir.toString() + "/work" + + }); + final String fosPath = getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json") + .getPath(); + + PrepareFOSSparkJob + .main( + new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", fosPath, + "-outputPath", workingDir.toString() + "/work" + }); + + SparkSaveUnresolved.main(new String[] { + "--isSparkSessionManaged", Boolean.FALSE.toString(), + "--sourcePath", workingDir.toString() + "/work", + + "-outputPath", workingDir.toString() + "/unresolved" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/unresolved") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(135, tmp.count()); + + Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")).count()); + + Assertions + .assertEquals( + 3, tmp + .filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")) + .collect() + .get(0) + .getSubject() + .size()); + + Assertions + .assertEquals( + 3, tmp + .filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")) + .collect() + .get(0) + .getMeasures() + .size()); + + List sbjs = tmp + .filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")) + .flatMap(row -> row.getSubject().iterator()) + .collect(); + + sbjs.forEach(sbj -> Assertions.assertEquals("FOS", sbj.getQualifier().getClassid())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals( + "Fields of Science and Technology classification", sbj.getQualifier().getClassname())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename())); + + sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference())); + sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); + sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible())); + sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); + sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); + sbjs + .forEach( + sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); + sbjs + .forEach( + sbj -> Assertions + .assertEquals( + ModelConstants.DNET_PROVENANCE_ACTIONS, + sbj.getDataInfo().getProvenanceaction().getSchemename())); + + sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("engineering and technology")); + sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nano-technology")); + sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nanoscience & nanotechnology")); + + List measures = tmp + .filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")) + .flatMap(row -> row.getMeasures().iterator()) + .collect(); + Assertions + .assertEquals( + "7.5597134689e-09", measures + .stream() + .filter(mes -> mes.getId().equals("influence")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + "4.903880192", measures + .stream() + .filter(mes -> mes.getId().equals("popularity_alt")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + "1.17977512835e-08", measures + .stream() + .filter(mes -> mes.getId().equals("popularity")) + .collect(Collectors.toList()) + .get(0) + .getUnit() + .get(0) + .getValue()); + + Assertions + .assertEquals( + 49, tmp + .filter(row -> !row.getId().equals("unresolved:10.3390/s18072310:doi")) + .filter(row -> row.getSubject() != null) + .count()); + + Assertions + .assertEquals( + 85, + tmp + .filter(row -> !row.getId().equals("unresolved:10.3390/s18072310:doi")) + .filter(r -> r.getMeasures() != null) + .count()); + + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/bip.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json similarity index 98% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/bip.json rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json index 82233c0dfd..03cef4be11 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/bip.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip/bip.json @@ -1,4 +1,4 @@ -{"10.0000/000000": [{"id": "influence", "unit": [{"value": "7.5597134689e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "4.903880192", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17977512835e-08", "key": "score"}]}]} +{"10.3390/s18072310": [{"id": "influence", "unit": [{"value": "7.5597134689e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "4.903880192", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.17977512835e-08", "key": "score"}]}]} {"10.0000/096020199389707": [{"id": "influence", "unit": [{"value": "6.34596412687e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.641151896994", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "2.33375102921e-09", "key": "score"}]}]} {"10.00000/jpmc.2017.106": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "5.39172290649e-09", "key": "score"}]}]} {"10.0000/9781845416881": [{"id": "influence", "unit": [{"value": "5.96492048955e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "1.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "1.12641925838e-08", "key": "score"}]}]} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos.json rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/fos.json diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv similarity index 100% rename from dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java deleted file mode 100644 index fa6cc9fab7..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/Utils.java +++ /dev/null @@ -1,17 +0,0 @@ - -package eu.dnetlib.dhp.bypassactionset; - -import org.jetbrains.annotations.NotNull; - -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; - -public class Utils { - private static final String ID_PREFIX = "50|doi_________"; - - @NotNull - public static String getIdentifier(String d) { - return ID_PREFIX + - IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", d)); - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java deleted file mode 100644 index 7f12847ad6..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/PrepareBipFinder.java +++ /dev/null @@ -1,89 +0,0 @@ - -package eu.dnetlib.dhp.bypassactionset.bipfinder; - -import static eu.dnetlib.dhp.bypassactionset.Utils.getIdentifier; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Optional; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bypassactionset.model.BipDeserialize; -import eu.dnetlib.dhp.bypassactionset.model.BipScore; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class PrepareBipFinder implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - public static void main(String[] args) throws Exception { - - String jsonConfiguration = IOUtils - .toString( - SparkUpdateBip.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/bip_prepare_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}: ", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - - prepareResults(spark, inputPath, outputPath); - }); - } - - private static void prepareResults(SparkSession spark, String inputPath, String outputPath) { - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD bipDeserializeJavaRDD = sc - .textFile(inputPath) - .map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class)); - - spark - .createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> { - BipScore bs = new BipScore(); - bs.setId(getIdentifier(key)); - bs.setScoreList(entry.get(key)); - return bs; - }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java deleted file mode 100644 index 53913c81b4..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/bipfinder/SparkUpdateBip.java +++ /dev/null @@ -1,131 +0,0 @@ - -package eu.dnetlib.dhp.bypassactionset.bipfinder; - -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bypassactionset.model.BipScore; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import scala.Tuple2; - -/** - * created the Atomic Action for each tipe of results - */ -public class SparkUpdateBip implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkUpdateBip.class); - - public static void main(String[] args) throws Exception { - - String jsonConfiguration = IOUtils - .toString( - SparkUpdateBip.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}: ", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); - - final String bipScorePath = parser.get("bipScorePath"); - log.info("bipScorePath: {}", bipScorePath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - Class inputClazz = (Class) Class.forName(resultClassName); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> updateBipFinder(spark, inputPath, outputPath, bipScorePath, inputClazz) - - ); - } - - private static void updateBipFinder(SparkSession spark, String inputPath, String outputPath, - String bipScorePath, Class inputClazz) { - - Dataset results = readPath(spark, inputPath, inputClazz); - Dataset bipScores = readPath(spark, bipScorePath, BipScore.class); - - results - .joinWith(bipScores, results.col("id").equalTo(bipScores.col("id")), "left") - .map((MapFunction, I>) value -> { - if (!Optional.ofNullable(value._2()).isPresent()) { - return value._1(); - } - value._1().setMeasures(getMeasure(value._2())); - return value._1(); - }, Encoders.bean(inputClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/bip"); - - } - - private static List getMeasure(BipScore value) { - return value - .getScoreList() - .stream() - .map(score -> { - Measure m = new Measure(); - m.setId(score.getId()); - m - .setUnit( - score - .getUnit() - .stream() - .map(unit -> { - KeyValue kv = new KeyValue(); - kv.setValue(unit.getValue()); - kv.setKey(unit.getKey()); - kv - .setDataInfo( - getDataInfo( - UPDATE_DATA_INFO_TYPE, - UPDATE_MEASURE_BIP_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, "")); - return kv; - }) - .collect(Collectors.toList())); - return m; - }) - .collect(Collectors.toList()); - } - -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java deleted file mode 100644 index 4a7bd0f98c..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/fos/SparkUpdateFOS.java +++ /dev/null @@ -1,121 +0,0 @@ - -package eu.dnetlib.dhp.bypassactionset.fos; - -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.*; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import scala.Tuple2; - -public class SparkUpdateFOS implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkUpdateFOS.class); - - public static void main(String[] args) throws Exception { - - String jsonConfiguration = IOUtils - .toString( - SparkUpdateFOS.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/fos_update_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}: ", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}: ", outputPath); - - final String fosPath = parser.get("fosPath"); - log.info("fosPath: {}", fosPath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - Class inputClazz = (Class) Class.forName(resultClassName); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> updateFos(spark, inputPath, outputPath, fosPath, inputClazz) - - ); - } - - private static void updateFos(SparkSession spark, String inputPath, String outputPath, - String fosPath, Class inputClazz) { - - Dataset results = readPath(spark, inputPath, inputClazz); - Dataset fosDataModelDataset = readPath(spark, fosPath, FOSDataModel.class); - - results - .joinWith(fosDataModelDataset, results.col("id").equalTo(fosDataModelDataset.col("doi")), "left") - .map((MapFunction, I>) value -> { - if (!Optional.ofNullable(value._2()).isPresent()) { - return value._1(); - } - value._1().getSubject().addAll(getSubjects(value._2())); - return value._1(); - }, Encoders.bean(inputClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - - } - - private static List getSubjects(FOSDataModel fos) { - return Arrays - .asList(getSubject(fos.getLevel1()), getSubject(fos.getLevel2()), getSubject(fos.getLevel3())) - .stream() - .filter(Objects::nonNull) - .collect(Collectors.toList()); - } - - private static StructuredProperty getSubject(String sbj) { - if (sbj.equals(NULL)) - return null; - StructuredProperty sp = new StructuredProperty(); - sp.setValue(sbj); - sp.setQualifier(getQualifier(FOS_CLASS_ID, FOS_CLASS_NAME, ModelConstants.DNET_SUBJECT_TYPOLOGIES)); - sp - .setDataInfo( - getDataInfo( - UPDATE_DATA_INFO_TYPE, - UPDATE_SUBJECT_FOS_CLASS_ID, - UPDATE_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, "")); - return sp; - - } - -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java deleted file mode 100644 index 0882b26d02..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/GetOpenCitationsRefs.java +++ /dev/null @@ -1,93 +0,0 @@ - -package eu.dnetlib.dhp.bypassactionset.opencitations; - -import java.io.*; -import java.io.Serializable; -import java.util.Objects; -import java.util.zip.GZIPOutputStream; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; - -import org.apache.commons.cli.ParseException; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class GetOpenCitationsRefs implements Serializable { - private static final Logger log = LoggerFactory.getLogger(GetOpenCitationsRefs.class); - - public static void main(final String[] args) throws IOException, ParseException { - - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - GetOpenCitationsRefs.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bypassactionset/opencitations/input_parameters.json")))); - - parser.parseArgument(args); - - final String[] inputFile = parser.get("inputFile").split(";"); - log.info("inputFile {}", inputFile.toString()); - - final String workingPath = parser.get("workingPath"); - log.info("workingPath {}", workingPath); - - final String hdfsNameNode = parser.get("hdfsNameNode"); - log.info("hdfsNameNode {}", hdfsNameNode); - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - GetOpenCitationsRefs ocr = new GetOpenCitationsRefs(); - - for (String file : inputFile) { - ocr.doExtract(workingPath + "/Original/" + file, workingPath, fileSystem); - } - - } - - private void doExtract(String inputFile, String workingPath, FileSystem fileSystem) - throws IOException { - - final Path path = new Path(inputFile); - - FSDataInputStream oc_zip = fileSystem.open(path); - - int count = 1; - try (ZipInputStream zis = new ZipInputStream(oc_zip)) { - ZipEntry entry = null; - while ((entry = zis.getNextEntry()) != null) { - - if (!entry.isDirectory()) { - String fileName = entry.getName(); - fileName = fileName.substring(0, fileName.indexOf("T")) + "_" + count; - count++; - try ( - FSDataOutputStream out = fileSystem - .create(new Path(workingPath + "/COCI/" + fileName + ".gz")); - GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) { - - IOUtils.copy(zis, gzipOs); - - } - } - - } - - } - - } - -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java deleted file mode 100644 index 64a54f143e..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bypassactionset/opencitations/SparkUpdateOCRels.java +++ /dev/null @@ -1,150 +0,0 @@ - -package eu.dnetlib.dhp.bypassactionset.opencitations; - -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.IOException; -import java.io.Serializable; -import java.util.*; - -import org.apache.commons.cli.ParseException; -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; - -public class SparkUpdateOCRels implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkUpdateOCRels.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - public static void main(final String[] args) throws IOException, ParseException { - - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Objects - .requireNonNull( - SparkUpdateOCRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/opencitations/as_parameters.json")))); - - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("inputPath"); - log.info("inputPath {}", inputPath.toString()); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath {}", outputPath); - - final boolean shouldDuplicateRels = Optional - .ofNullable(parser.get("shouldDuplicateRels")) - .map(Boolean::valueOf) - .orElse(Boolean.FALSE); - - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> addOCRelations(spark, inputPath, outputPath, shouldDuplicateRels)); - - } - - private static void addOCRelations(SparkSession spark, String inputPath, String outputPath, - boolean shouldDuplicateRels) { - spark - .sqlContext() - .createDataset(spark.sparkContext().textFile(inputPath + "/*", 6000), Encoders.STRING()) - .flatMap( - (FlatMapFunction) value -> createRelation(value, shouldDuplicateRels).iterator(), - Encoders.bean(Relation.class)) - .filter((FilterFunction) value -> value != null) - .write() - .mode(SaveMode.Append) - .option("compression", "gzip") - .json(outputPath); - - } - - private static List createRelation(String value, boolean duplicate) { - String[] line = value.split(","); - if (!line[1].startsWith("10.")) { - return new ArrayList<>(); - } - List relationList = new ArrayList<>(); - - String citing = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[1])); - final String cited = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue(DOI, line[2])); - - relationList - .addAll( - getRelations( - citing, - cited)); - - if (duplicate && line[1].endsWith(REF_DOI)) { - citing = ID_PREFIX + IdentifierFactory - .md5(CleaningFunctions.normalizePidValue(DOI, line[1].substring(0, line[1].indexOf(REF_DOI)))); - relationList.addAll(getRelations(citing, cited)); - } - - return relationList; - } - - private static Collection getRelations(String citing, String cited) { - - return Arrays - .asList( - getRelation(citing, cited, ModelConstants.CITES), - getRelation(cited, citing, ModelConstants.IS_CITED_BY)); - } - - public static Relation getRelation( - String source, - String target, - String relclass) { - Relation r = new Relation(); - r.setCollectedfrom(getCollectedFrom()); - r.setSource(source); - r.setTarget(target); - r.setRelClass(relclass); - r.setRelType(ModelConstants.RESULT_RESULT); - r.setSubRelType(ModelConstants.CITATION); - r - .setDataInfo( - getDataInfo( - UPDATE_DATA_INFO_TYPE, OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, - ModelConstants.DNET_PROVENANCE_ACTIONS, OC_TRUST, false)); - return r; - } - - public static List getCollectedFrom() { - KeyValue kv = new KeyValue(); - kv.setKey(ModelConstants.OPENOCITATIONS_ID); - kv.setValue(ModelConstants.OPENOCITATIONS_NAME); - - return Arrays.asList(kv); - } - -} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json deleted file mode 100644 index 31771a40ad..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bypassactionset/bip_update_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false - }, - { - "paramName": "ip", - "paramLongName": "inputPath", - "paramDescription": "the URL from where to get the programme file", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - }, - { - "paramName": "rtn", - "paramLongName": "resultTableName", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - }, - { - "paramName": "bsp", - "paramLongName": "bipScorePath", - "paramDescription": "the path of the new ActionSet", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/BipTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/BipTest.java deleted file mode 100644 index 41595eacf4..0000000000 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/BipTest.java +++ /dev/null @@ -1,250 +0,0 @@ - -package eu.dnetlib.dhp.bypassactionset; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.stream.Collectors; - -import eu.dnetlib.dhp.schema.oaf.Author; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.neethi.Assertion; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.bypassactionset.bipfinder.PrepareBipFinder; -import eu.dnetlib.dhp.bypassactionset.bipfinder.SparkUpdateBip; -import eu.dnetlib.dhp.bypassactionset.model.BipScore; -import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest; -import eu.dnetlib.dhp.schema.oaf.Measure; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; - -public class BipTest { - - private static final Logger log = LoggerFactory.getLogger(FOSTest.class); - - private static Path workingDir; - private static SparkSession spark; - private static LocalFileSystem fs; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final String ID_PREFIX = "50|doi_________"; - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(BipTest.class.getSimpleName()); - - fs = FileSystem.getLocal(new Configuration()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(FOSTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(CountryPropagationJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - void prepareBipTest() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/bip/bip.json") - .getPath(); - - PrepareBipFinder - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--inputPath", sourcePath, - "--outputPath", workingDir.toString() + "/remapDoi" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/remapDoi") - .map(item -> OBJECT_MAPPER.readValue(item, BipScore.class)); - - Assertions.assertEquals(86, tmp.count()); -// tmp.foreach(v -> System.out.println(OBJECT_MAPPER.writeValueAsString(v))); - - String doi1 = ID_PREFIX + - IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.0000/096020199389707")); - - Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); - Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getScoreList().size()); - Assertions - .assertEquals( - "6.34596412687e-09", tmp - .filter(r -> r.getId().equals(doi1)) - .collect() - .get(0) - .getScoreList() - .stream() - .filter(sl -> sl.getId().equals("influence")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - Assertions - .assertEquals( - "0.641151896994", tmp - .filter(r -> r.getId().equals(doi1)) - .collect() - .get(0) - .getScoreList() - .stream() - .filter(sl -> sl.getId().equals("popularity_alt")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - Assertions - .assertEquals( - "2.33375102921e-09", tmp - .filter(r -> r.getId().equals(doi1)) - .collect() - .get(0) - .getScoreList() - .stream() - .filter(sl -> sl.getId().equals("popularity")) - .collect(Collectors.toList()) - .get(0) - .getUnit() - .get(0) - .getValue()); - - } - - @Test - void updateResult() throws Exception { - final String bipScorePath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json") - .getPath(); - - final String inputPath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json") - .getPath(); - - SparkUpdateBip - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--bipScorePath", bipScorePath, - "--inputPath", inputPath, - "--outputPath", workingDir.toString() + "/publication", - "--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication/bip") - .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); - - Assertions.assertEquals(6, tmp.count()); - Assertions.assertEquals(0, tmp.filter(r -> r.getMeasures() != null).count()); - tmp.foreach(r -> Assertions.assertEquals("publication", r.getResulttype().getClassid())); - - } - - @Test - void updateResultMatchCheckMeasures() throws Exception { - final String bipScorePath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json") - .getPath(); - - final String inputPath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json") - .getPath(); - - SparkUpdateBip - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--bipScorePath", bipScorePath, - "--inputPath", inputPath, - "--outputPath", workingDir.toString() + "/publication", - "--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication/bip") - .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); - - Assertions.assertEquals(6, tmp.count()); - Assertions.assertEquals(1, tmp.filter(r -> r.getMeasures() != null).count()); - Assertions - .assertEquals( - 1, tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")).count()); - Assertions - .assertEquals( - 1, - tmp - .filter( - r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f") - && r.getMeasures() != null) - .count()); - Assertions.assertEquals(3, tmp - .filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) - .collect() - .get(0) - .getMeasures().size()); - - Assertions.assertEquals("5.91019644836e-09", - tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) - .collect() - .get(0).getMeasures().stream().filter(m -> m.getId().equals("influence")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue()); - Assertions.assertEquals("0.0", - tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) - .collect() - .get(0).getMeasures().stream().filter(m -> m.getId().equals("popularity_alt")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue()); - Assertions.assertEquals("9.88840807598e-09", - tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) - .collect() - .get(0).getMeasures().stream().filter(m -> m.getId().equals("popularity")).collect(Collectors.toList()).get(0).getUnit().get(0).getValue()); - - tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); - - } - - - -} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/FOSTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/FOSTest.java deleted file mode 100644 index db539703b7..0000000000 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bypassactionset/FOSTest.java +++ /dev/null @@ -1,253 +0,0 @@ - -package eu.dnetlib.dhp.bypassactionset; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import eu.dnetlib.dhp.PropagationConstant; -import eu.dnetlib.dhp.bypassactionset.fos.SparkUpdateFOS; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.neethi.Assertion; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.bypassactionset.fos.GetFOSData; -import eu.dnetlib.dhp.bypassactionset.fos.PrepareFOSSparkJob; -import eu.dnetlib.dhp.bypassactionset.model.FOSDataModel; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; - -public class FOSTest { - private static final Logger log = LoggerFactory.getLogger(FOSTest.class); - - private static Path workingDir; - private static SparkSession spark; - private static LocalFileSystem fs; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final String ID_PREFIX = "50|doi_________"; - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(CountryPropagationJobTest.class.getSimpleName()); - - fs = FileSystem.getLocal(new Configuration()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(FOSTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(CountryPropagationJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - void getFOSFileTest() throws CollectorException, IOException, ClassNotFoundException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/fos/h2020_fos_sbs.csv") - .getPath(); - final String outputPath = workingDir.toString() + "/fos.json"; - - new GetFOSData() - .doRewrite(sourcePath, outputPath, "eu.dnetlib.dhp.bypassactionset.FOSDataModel", '\t', fs); - - BufferedReader in = new BufferedReader( - new InputStreamReader(fs.open(new org.apache.hadoop.fs.Path(outputPath)))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - FOSDataModel fos = new ObjectMapper().readValue(line, FOSDataModel.class); - - System.out.println(new ObjectMapper().writeValueAsString(fos)); - count += 1; - } - - assertEquals(38, count); - - } - - @Test - void distributeDoiTest() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos.json") - .getPath(); - - PrepareFOSSparkJob - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--sourcePath", sourcePath, - - "-outputPath", workingDir.toString() + "/distribute" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/distribute") - .map(item -> OBJECT_MAPPER.readValue(item, FOSDataModel.class)); - - String doi1 = ID_PREFIX + - IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.3390/s18072310")); - - assertEquals(50, tmp.count()); - assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi1)).count()); - assertEquals( - "engineering and technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel1()); - assertEquals("nano-technology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel2()); - assertEquals( - "nanoscience & nanotechnology", tmp.filter(r -> r.getDoi().equals(doi1)).collect().get(0).getLevel3()); - - String doi = ID_PREFIX + - IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/1365-2656.12831")); - assertEquals(1, tmp.filter(row -> row.getDoi().equals(doi)).count()); - assertEquals("social sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel1()); - assertEquals( - "psychology and cognitive sciences", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel2()); - assertEquals("NULL", tmp.filter(r -> r.getDoi().equals(doi)).collect().get(0).getLevel3()); - - - } - - @Test - void updateResult() throws Exception{ - - final String fosPath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json") - .getPath(); - - final String inputPath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json") - .getPath(); - - SparkUpdateFOS - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--fosPath", fosPath, - "--inputPath", inputPath, - "--outputPath", workingDir.toString() + "/publication", - "--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); - - Assertions.assertEquals(6, tmp.count()); - - tmp.filter(r -> r.getSubject() != null).map(p -> p.getSubject()) - .foreach(s -> s.stream().forEach(sbj -> Assertions.assertFalse("FOS".equals(sbj.getQualifier().getClassid())))); - - - } - - @Test - void updateResultMatch() throws Exception{ - final String fosPath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json") - .getPath(); - - final String inputPath = getClass() - .getResource("/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json") - .getPath(); - - SparkUpdateFOS - .main( - new String[] { - "--isSparkSessionManaged", Boolean.FALSE.toString(), - "--fosPath", fosPath, - "--inputPath", inputPath, - "--outputPath", workingDir.toString() + "/publication", - "--resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); - - Assertions.assertEquals(6, tmp.count()); - - Assertions.assertEquals(3, tmp.filter(r -> r.getSubject() != null).map(p -> p.getSubject()).flatMap(v -> v.iterator()) - .filter(sbj -> sbj.getQualifier().getClassid().equals("FOS")).collect().size()); - - - List sbjs = tmp.filter(r -> r.getId().equals("50|doi_________b24ab3e127aa67e2a1017292988d571f")) - .map(p -> p.getSubject()).collect().get(0); - - Assertions.assertEquals(12, sbjs.size()); - - Stream fosSubjs = sbjs.stream().filter(sbj -> sbj.getQualifier().getClassid().equals("FOS")); - - Assertions.assertTrue(fosSubjs - .map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("engineering and technology")); - Assertions.assertTrue(fosSubjs - .map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("nano-technology")); - Assertions.assertTrue(fosSubjs - .map(sbj -> sbj.getValue()).collect(Collectors.toList()).contains("nanoscience & nanotechnology")); - - fosSubjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()) ); - fosSubjs.forEach(sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid()) ); - fosSubjs.forEach(sbj -> Assertions.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname() )); - fosSubjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust() )); - fosSubjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference() )); - fosSubjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible() )); - fosSubjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred() )); - - - - } - - - -} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json deleted file mode 100644 index bf5817bd20..0000000000 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/preparedbip.json +++ /dev/null @@ -1,86 +0,0 @@ -{"id":"50|doi_________63848be3afd635374828253a6f974f11","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} -{"id":"50|doi_________5da9060b89165e3f61a0806dcf2c2696","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________9ea0266b8ddd471eddb20635b89273bb","scoreList":[{"id":"influence","unit":[{"key":"score","value":"7.5597134689e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"4.903880192"}]},{"id":"popularity","unit":[{"key":"score","value":"1.17977512835e-08"}]}]} -{"id":"50|doi_________ab797495de07d4f4a25f08b84fca6021","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.34596412687e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.641151896994"}]},{"id":"popularity","unit":[{"key":"score","value":"2.33375102921e-09"}]}]} -{"id":"50|doi_________df5ade937e177dcfb82bc5ec3139fefe","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} -{"id":"50|doi_________357976ea7d4e744fe21966607334952c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} -{"id":"50|doi_________872982548f741b89eb5d30f509316dde","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.32078461509e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"1.6"}]},{"id":"popularity","unit":[{"key":"score","value":"8.3168486939e-09"}]}]} -{"id":"50|doi_________19c6da2771befb83f9f2715fc84f9edf","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.96492048955e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"1.0"}]},{"id":"popularity","unit":[{"key":"score","value":"1.12641925838e-08"}]}]} -{"id":"50|doi_________46e18e9e477d6fc71e002eae47cfc390","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________5cc344a6da53a8f2bac11faf7607e0b0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.76260934675e-10"}]}]} -{"id":"50|doi_________fdfadf5cefdb8b63f90d5a3475a95959","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} -{"id":"50|doi_________51fc7a9c7b9f1cf6705c7afb1de58967","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.93311506443e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.002176782336"}]},{"id":"popularity","unit":[{"key":"score","value":"1.7668105708e-09"}]}]} -{"id":"50|doi_________bbf7a94696ab67e4c29429522c31c0ef","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} -{"id":"50|doi_________232498b3aed64dd0f0db87b09b61d5cd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.26777280882e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.406656"}]},{"id":"popularity","unit":[{"key":"score","value":"3.39745193285e-09"}]}]} -{"id":"50|doi_________03749bed57efa24ab607527cf1eb94c5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________65074998446928c1c18e25313dcf974b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________2b62feaed6ad14760096c2a59f82836e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________d2d133a6fdfd44f34f96e225b5573447","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________3dd3cccbfcad3f206d1239a580af011f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________84852a20e9bbf1daf89803b5fcfc9c34","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]} -{"id":"50|doi_________84c5824b3a10a28a8bc26fed6223a08a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________8658fda7574016daa0d71c88eb6bab5f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]} -{"id":"50|doi_________ab52c136b88151f0c28f6ca4a5ef2a71","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________e8a2912fe3d70b13124e436294411378","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________cf3470c86f338ccf232f2869d0fa6ea2","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________16437c0064576207ba1438bf07fb9e21","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________f0ae1dc0f1dbb7fceaccc02d6e667f24","scoreList":[{"id":"influence","unit":[{"key":"score","value":"6.40470414877e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.6"}]},{"id":"popularity","unit":[{"key":"score","value":"7.89465099068e-09"}]}]} -{"id":"50|doi_________93d03d99bbc0f542fa2679e74882a096","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]} -{"id":"50|doi_________fcfce4bc985f22baf2dc1dc733a862e7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________3b7fd390517f45b0d37e0ec0adbb98b1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"3.47956715615e-09"}]}]} -{"id":"50|doi_________31306ba727c26ba04c0d4bbd0899d433","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________3e6151aa77865a1fdae56392f68b81f1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________1cd6e2c4cf189819fa8a011629b5744c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________45c855d81df747ff3c2033f6c8cf7bca","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________24ac4c0c4b143e37c2551723e9a55f36","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________5f9e3f1a076c2f587165f8ccd68286e5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________b637731c913efe3c1f283183ef4a59cc","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"6.26204125721e-09"}]}]} -{"id":"50|doi_________490c0c23e4ed269f807e72e71cad09ae","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________15584e5a3a5bbdc487c85ab18d8a7c22","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________a7fd268447553e7e0fe06c19db28ea85","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________cf1b8db4480aa0e281a4cc122d3b2416","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________8880fd8fa9cafbde2431583dfbab1e1f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________22f8b4f61c49de05dcc0d94bf5e147d8","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________96e2b076fab4931ec2b1b9f43c9c31f9","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________1ac665f912877f93f9084dc38bc8174b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________394fd70c30b21b1336b5a0405496fbe0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________73caf44494091b089f8be65427fb4341","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________9b996963325dfae10dba5c3043938fc1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________1f03d5cab86f4963fbd3e0f1284cb9b8","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________87a59f3918f9ac2c394b2509173fb3e1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________9a8465f5343a1c845b6dd91496395c7e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________b03c0a657c669d24f45844307b9ad6bd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________04aeabd709d8a486bde733ffc5ef53ae","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.01810569717e-09"}]}]} -{"id":"50|doi_________6c21d2d2133477141ad74226f8fd75b0","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"7.28336930301e-09"}]}]} -{"id":"50|doi_________b746ffb04cd9f815d5880c827c55040e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________0457b2e62a69f0e104db7cc575f2241a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________2fa691f5394020f7cb2df4f272a062e7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________aa2a77dc605567f5f2305b5f38a1a4b7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________cdb6f10588d85495208cbc6bf4a75000","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________6630766c2ad4783ebd01a9d7e9607eb1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________389315d5f74765688d34b8ee86c2514f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________beef7316dd6665798120698451446d74","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________525a46f117c946b8ae1caea542e24d0c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________fe3d2250ebaa65dae09aa372945c917f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________72d2ec1744cda7d49cf35707ff3e11c1","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________d7e0c5fd1f00d8a90879f168d73b9e0b","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________6e0e19ee1f9c1dbadf202ccd9919e6a7","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________6b306286f9b4d207f81bc9164713243c","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________acf5c0c1a125cd78fd9193589edc9152","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________66f645f86cdd9592953a1106e0dbf191","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________77ff842449308dd907ec46ebf9ccd539","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________a3d8fc73d1ac1308c117a61b4eb14ab5","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________9ff57cb17abfcd338c2ca768c85e452a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"5.39172290649e-09"}]}]} -{"id":"50|doi_________f903c5c3305fc7b1598a0ed989bb4f83","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"4.65008652949e-09"}]}]} -{"id":"50|doi_________2260d2cdf6fbcee10d61d5e0f1428ebd","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________549c5c8f66c36cab609b7221eae37040","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________388a95b886946d771a7f38e86d58f65e","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________4141ef3f730fc811930a66c088486c34","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} -{"id":"50|doi_________e3b32800bba3187fbf7f470df08755ea","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"8.48190886761e-09"}]}]} -{"id":"50|doi_________d8d02aa2c2347ecc628724265c5cae27","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} -{"id":"50|doi_________08f1db95b06e62e34948f2b47ac449af","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} -{"id":"50|doi_________cbbab3f7306b0573664b94615f5c04e2","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} -{"id":"50|doi_________b24ab3e127aa67e2a1017292988d571f","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} -{"id":"50|doi_________eb9a4f92bd6934727e2722d22340884a","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} -{"id":"50|doi_________727484ef97b31ecb0806cc959869aa97","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} -{"id":"50|doi_________224c865ef04d10c117913fa08eeb1be4","scoreList":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09"}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0"}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09"}]}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json deleted file mode 100644 index b458cbeb3e..0000000000 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationmatch.json +++ /dev/null @@ -1,6 +0,0 @@ -{"author":[{"fullname":"Tanouayi, Gnon","name":"Gnon","pid":[],"rank":1,"surname":"Tanouayi"},{"fullname":"GNANDI, Kissao","name":"Kissao","pid":[],"rank":2,"surname":"Gnandi"},{"fullname":"Ouro-Sama, Kamilou","name":"Kamilou","pid":[],"rank":3,"surname":"Ouro-Sama"},{"fullname":"Ahoudi, Housséni","name":"Housséni","pid":[],"rank":4,"surname":"Ahoudi"},{"fullname":"Solitoke, Hodabalo Dhéoulaba","name":"Hodabalo Dhéoulaba","pid":[],"rank":5,"surname":"Solitoke"},{"fullname":"Badassan, Tchaa Esso-Essinam","name":"Tchaa Esso-Essinam","pid":[],"rank":6,"surname":"Badassan"},{"fullname":"Nyametso, A. Yawovi","name":"A. Yawovi","pid":[],"rank":7,"surname":"Nyametso"},{"fullname":"Agbéko, Aduayi-Akué Adoté","name":"Aduayi-Akué Adoté","pid":[],"rank":8,"surname":"Agbéko"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Importer of dst articles previously hosted by inist Eid system account"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"dateofcollection":"2021-10-05T19:43:46.821Z","dateoftransformation":"2021-10-05T20:15:55.705Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This study is a contribution to the development of adsorption techniques for the removal of fluoride in natural waters. The work is carried out on a laboratory scale using local geo-materials sorbents, on the one hand the residues from the treatment of natural phosphorite of Hahotoé-Kpogamé and on the other hand the attapulgite clay mineral from the costal basin of Togo. The work carried out concerns the adsorption of fluoride on those sorbents. The following parameters are batch tested on synthetic fluoride solutions: time, solution pH, geo-material dose and fluoride concentration. The fluoride is analyzed by absorption spectrometry. The adsorption yields on the phosphorite treatment residues for aqueous fluoride solutions at an initial concentration of 10 mg/L and an adsorbent concentration of 10 g/L are 49 % at pH 6.5 and 66 % at pH 4.0. In the same experimental conditions, the yields on clay minerals are 28.2 % and 36.3 %. These yields are logically improved by increasing the adsorbent dosage (from 2 to 30 g/L). Additional tests are carried out on natural water at an initial fluoride concentration of 3.76 mg/L."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Cette étude est une contribution au développement de techniques d’adsorption pour l’élimination du fluor dans les eaux naturelles. Les travaux ont été réalisés à l’échelle du laboratoire en utilisant comme sorbants des géo-matériaux locaux, d’une part les résidus du traitement des phosphates naturels de Hahotoé-Kpogamé et d’autre part l’argilite feuilletée du bassin sédimentaire côtier du Togo. Les travaux réalisés concernent l’adsorption du fluor sur les sorbants considérés. Les paramètres suivants ont été testés en batch sur des solutions synthétiques de fluor : le temps, le pH de la solution, la dose du géo-matériau et la concentration du fluor. Le fluor a été dosé par spectromètrie d’absorption. Les rendements d’adsorption sur les résidus de traitement des phosphates pour des solutions aqueuses de fluor à concentration initiale de 10 mg/L et une concentration en adsorbant de 10 g/L ont été de 49 % à pH 6,5 et 66 % à pH de 4,0. Dans les mêmes conditions expérimentales, les rendements sur les argilites ont été de 28,2 % et 36,3 %. Ces rendements ont logiquement été améliorés en augmentant le dosage en adsorbant (de 2 à 30 g/L). Des essais complémentaires ont été réalisés sur une eau naturelle à une concentration initiale en fluor de 3,76 mg/L."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doi_________b24ab3e127aa67e2a1017292988d571f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.4267/dechets-sciences-techniques.3534"}],"collectedfrom":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://eid.episciences.org/7781"]}],"language":{"classid":"fra/fre","classname":"French","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146800948,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Foai.episciences.org%2F","datestamp":"2017-05-01","harvestDate":"2021-10-05T19:43:46.821Z","identifier":"oai:episciences.org:eid:7781","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:episciences.org:eid:7781","50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"ISSN: 2778-844X"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Environnement, Ingénierie & Développement"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Episciences.org"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"eid:7781 - Environnement, Ingénierie & Développement, 2017-05-01, N°73 - mai 2017"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"clay"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"geo-materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphorite of Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"removal of fluoride"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"argilite"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"élimination du fluor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"géo-matériaux"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphate de Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"[SDE.IE]Environmental Sciences/Environmental Engineering"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Défluoruration des eaux à l’aide des résidus du traitement des phosphates naturels et des argilites feuilletées"}]} -{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"dateofcollection":"2020-06-01T07:11:47.22Z","dateoftransformation":"2020-07-25T07:25:11.051Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The results of treatment of 21 patients with multiple injuries, including 12 (57.2%) patients with lesions of limb bones, 9 (42.8%) patients with injuries of the pelvis treated at the Department of Traumatology number 2, 5 for the period of 2013 to 2014 were analyzed by the authors. Developed gentle immobilization of the lower limbs in patients with multiple injuries provides adequate fixation and extension of the lower limb in the intensive care period and during emergency external fixation by the device of external fixation device shin bone or hip.The introduction of surgical treatment of patients in the acute period of polytrauma, using minimally invasive fracture fixation technology allowed providing the early stabilization of the victim’s condition, to avoid diagnostic errors and obtain positive results of treatment in 98% of cases."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"distributionlocation":"","hostedby":{"key":"10|07b5c0ccd4fe::fae55652d82f1c4f6a6e3d2637b9ebe1","value":"World Science"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://rsglobal.pl/index.php/ws/article/view/895"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2413-1032","issnPrinted":"2414-6404","name":"World Science","sp":"","vol":""},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635147062560,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frsglobal.pl%2Findex.php%2Findex%2Foai","datestamp":"2020-05-23T18:09:46Z","harvestDate":"2020-06-01T07:11:47.22Z","identifier":"oai:ojs2.rsglobal.pl:article/895","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:ojs2.rsglobal.pl:article/895","50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"RS Global Sp. z O.O."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Vol 3 No 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Том 3 № 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2414-6404"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2413-1032"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"}]} -{"author":[{"fullname":"Kemppainen, Mika","name":"Mika","pid":[],"rank":1,"surname":"Kemppainen"},{"fullname":"Virkkunen, Iikka","name":"Iikka","pid":[],"rank":2,"surname":"Virkkunen"},{"fullname":"Pitkänen, Jorma","name":"Jorma","pid":[],"rank":3,"surname":"Pitkänen"},{"fullname":"Paussu, Raimo","name":"Raimo","pid":[],"rank":4,"surname":"Paussu"},{"fullname":"Hänninen, Hannu","name":"Hannu","pid":[],"rank":5,"surname":"Hänninen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2021-10-04T12:38:49.007Z","dateoftransformation":"2021-10-04T16:48:12.265Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::210c52944502777ba567442480e6a76e","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146785228,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-23T08:45:14Z","harvestDate":"2021-10-04T12:38:49.007Z","identifier":"oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::210c52944502777ba567442480e6a76e","oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kemppainen , M , Virkkunen , I , Pitkänen , J , Paussu , R & Hänninen , H 2003 , ' Comparison of realistic artificial cracks and in-service cracks ' , The e-Journal of Nondestructive Testing & Ultrasonics , vol. 8 , no. 3 , 6 . < http://www.ndt.net/article/ecndt02/401/401.htm >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Comparison of realistic artificial cracks and in-service cracks"}]} -{"author":[{"fullname":"Kelhä, Väinö","name":"Väinö","pid":[],"rank":1,"surname":"Kelhä"},{"fullname":"Manninen, M.","name":"M.","pid":[],"rank":2,"surname":"Manninen"},{"fullname":"Oittinen, P.","name":"P.","pid":[],"rank":3,"surname":"Oittinen"},{"fullname":"Tiesmäki, Jarkko","name":"Jarkko","pid":[],"rank":4,"surname":"Tiesmäki"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"dateofcollection":"2021-10-04T12:35:21.081Z","dateoftransformation":"2021-10-04T18:07:22.62Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3754cff043a1700077031ea29f8cc240","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146808523,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-08-31T11:55:56Z","harvestDate":"2021-10-04T12:35:21.081Z","identifier":"oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::3754cff043a1700077031ea29f8cc240","oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kelhä , V , Manninen , M , Oittinen , P & Tiesmäki , J 1974 , ' A parallel plate tackmeter for measuring the splitting resistance of printing inks ' , Surface Coatings International: JOCCA , vol. 57 , pp. 184-188 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A parallel plate tackmeter for measuring the splitting resistance of printing inks"}]} -{"author":[{"fullname":"Mononen, Petri","name":"Petri","pid":[],"rank":1,"surname":"Mononen"},{"fullname":"Innamaa, Satu","name":"Satu","pid":[],"rank":2,"surname":"Innamaa"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2021-10-04T12:44:18.145Z","dateoftransformation":"2021-10-04T18:11:36.436Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3875365f5052758953b072682e62bc80","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8602dae4-00e8-4f45-828b-65a367eb4730"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146809712,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-09-17T12:01:51Z","harvestDate":"2021-10-04T12:44:18.145Z","identifier":"oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","50|355e65625b88::3875365f5052758953b072682e62bc80"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mononen , P & Innamaa , S 2013 , ' Enhancing journey quality : Field Operational Test of Aftermarket and Nomadic Devices in Vehicles ' , Baltic Transport Journal , pp. 46-47 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Enhancing journey quality:Field Operational Test of Aftermarket and Nomadic Devices in Vehicles"}]} -{"author":[{"fullname":"Tsupari, Eemeli","name":"Eemeli","pid":[],"rank":1,"surname":"Tsupari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"dateofcollection":"2021-10-04T12:44:48.87Z","dateoftransformation":"2021-10-04T19:47:43.327Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Despite international agreements, global greenhouse gas (GHG) emissions have not decreased according to the targets. Consequently, our generation is creating an enormous problem for future generations. As climate change is a global problem, GHG emissions must decrease globally. Consequently, international policies are needed, actions should be effective and the impacts should be assessed with broad boundaries. In Europe, the cornerstone of climate policy is the EU Emissions Trading Scheme (EU ETS) but the rebound impacts within the EU ETS are often excluded in the assessments. This dissertation examines the impacts of major CO2 emission reduction solutions with different system boundaries, highlighting the importance of boundary selection on the results. In addition, the economic feasibilities of the selected solutions are evaluated.The case examples represent the most important sectors in terms of global CO2 emissions, such as electricity and heat production, the steel industry and transport. The studied technologies include efficient Waste-to-Energy (WtE) concepts with high power-to-heat ratio, utilisation of CO2 Capture and Storage (CCS) in different applications, replacing steel mill blast furnaces with Oxygen Blast Furnaces (OBF), Combined Heat and Power (CHP) and Carbon Capture and Utilisation (CCU) for storable fuels, which can be used for example in transportation. The results highlight the importance of the consequences in the electricity production system as well as the rebound impacts in the EU ETS. For example, the studied concepts to decrease direct GHG emissions of steel mills lead to increased power purchase from markets and consequently increase in emissions of the power system. The impacts of CCU concepts based on electrolysis increase the emissions in electricity production but enable a decrease in the usage of fossil fuels in transportation. In addition, converting electricity to storable fuels enable higher shares of variable solar and wind energy in the power systems. The consequences in the power systems are complex, including for example the impacts on electricity imports and exports, future investments and the EU ETS. Even if these impacts can be recognised by qualitative means, unambiguous quantitative consequences cannot be given. Understanding the decisive impacts of the framework and boundaries is crucial to interpreting different assessments and making effective actions and policy decisions. Solutions which decrease emissions within a narrow system boundary can actually increase the emissions of the broader system."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0006","classname":"Doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d62ac5ef-7347-400f-95b2-59d970ceb505"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146840046,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-05-18T10:33:00Z","harvestDate":"2021-10-04T12:44:48.87Z","identifier":"oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto University"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tsupari , E 2018 , ' Impact of system boundaries on the effectiveness of climate change mitigation actions : Dissertation ' , Doctor Degree , Aalto University . < http://urn.fi/URN:ISBN:978-952-60-8358-2 >"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"energy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"environmental science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"climate change mitigation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"greenhouse gases"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon dioxide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"emissions trading"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"economic feasibility"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/dk/atira/pure/sustainabledevelopmentgoals/climate_action"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"SDG 13 - Climate Action"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Impact of system boundaries on the effectiveness of climate change mitigation actions:Dissertation"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json deleted file mode 100644 index e9d045acab..0000000000 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/bip/publicationnomatch.json +++ /dev/null @@ -1,6 +0,0 @@ -{"author":[{"fullname":"Tanouayi, Gnon","name":"Gnon","pid":[],"rank":1,"surname":"Tanouayi"},{"fullname":"GNANDI, Kissao","name":"Kissao","pid":[],"rank":2,"surname":"Gnandi"},{"fullname":"Ouro-Sama, Kamilou","name":"Kamilou","pid":[],"rank":3,"surname":"Ouro-Sama"},{"fullname":"Ahoudi, Housséni","name":"Housséni","pid":[],"rank":4,"surname":"Ahoudi"},{"fullname":"Solitoke, Hodabalo Dhéoulaba","name":"Hodabalo Dhéoulaba","pid":[],"rank":5,"surname":"Solitoke"},{"fullname":"Badassan, Tchaa Esso-Essinam","name":"Tchaa Esso-Essinam","pid":[],"rank":6,"surname":"Badassan"},{"fullname":"Nyametso, A. Yawovi","name":"A. Yawovi","pid":[],"rank":7,"surname":"Nyametso"},{"fullname":"Agbéko, Aduayi-Akué Adoté","name":"Aduayi-Akué Adoté","pid":[],"rank":8,"surname":"Agbéko"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Importer of dst articles previously hosted by inist Eid system account"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"dateofcollection":"2021-10-05T19:43:46.821Z","dateoftransformation":"2021-10-05T20:15:55.705Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This study is a contribution to the development of adsorption techniques for the removal of fluoride in natural waters. The work is carried out on a laboratory scale using local geo-materials sorbents, on the one hand the residues from the treatment of natural phosphorite of Hahotoé-Kpogamé and on the other hand the attapulgite clay mineral from the costal basin of Togo. The work carried out concerns the adsorption of fluoride on those sorbents. The following parameters are batch tested on synthetic fluoride solutions: time, solution pH, geo-material dose and fluoride concentration. The fluoride is analyzed by absorption spectrometry. The adsorption yields on the phosphorite treatment residues for aqueous fluoride solutions at an initial concentration of 10 mg/L and an adsorbent concentration of 10 g/L are 49 % at pH 6.5 and 66 % at pH 4.0. In the same experimental conditions, the yields on clay minerals are 28.2 % and 36.3 %. These yields are logically improved by increasing the adsorbent dosage (from 2 to 30 g/L). Additional tests are carried out on natural water at an initial fluoride concentration of 3.76 mg/L."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Cette étude est une contribution au développement de techniques d’adsorption pour l’élimination du fluor dans les eaux naturelles. Les travaux ont été réalisés à l’échelle du laboratoire en utilisant comme sorbants des géo-matériaux locaux, d’une part les résidus du traitement des phosphates naturels de Hahotoé-Kpogamé et d’autre part l’argilite feuilletée du bassin sédimentaire côtier du Togo. Les travaux réalisés concernent l’adsorption du fluor sur les sorbants considérés. Les paramètres suivants ont été testés en batch sur des solutions synthétiques de fluor : le temps, le pH de la solution, la dose du géo-matériau et la concentration du fluor. Le fluor a été dosé par spectromètrie d’absorption. Les rendements d’adsorption sur les résidus de traitement des phosphates pour des solutions aqueuses de fluor à concentration initiale de 10 mg/L et une concentration en adsorbant de 10 g/L ont été de 49 % à pH 6,5 et 66 % à pH de 4,0. Dans les mêmes conditions expérimentales, les rendements sur les argilites ont été de 28,2 % et 36,3 %. Ces rendements ont logiquement été améliorés en augmentant le dosage en adsorbant (de 2 à 30 g/L). Des essais complémentaires ont été réalisés sur une eau naturelle à une concentration initiale en fluor de 3,76 mg/L."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.4267/dechets-sciences-techniques.3534"}],"collectedfrom":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://eid.episciences.org/7781"]}],"language":{"classid":"fra/fre","classname":"French","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146800948,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Foai.episciences.org%2F","datestamp":"2017-05-01","harvestDate":"2021-10-05T19:43:46.821Z","identifier":"oai:episciences.org:eid:7781","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:episciences.org:eid:7781","50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"ISSN: 2778-844X"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Environnement, Ingénierie & Développement"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Episciences.org"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"eid:7781 - Environnement, Ingénierie & Développement, 2017-05-01, N°73 - mai 2017"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"clay"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"geo-materials"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphorite of Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"removal of fluoride"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"argilite"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"élimination du fluor"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"géo-matériaux"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"phosphate de Hahotoé-Kpogamé"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"[SDE.IE]Environmental Sciences/Environmental Engineering"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Défluoruration des eaux à l’aide des résidus du traitement des phosphates naturels et des argilites feuilletées"}]} -{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"dateofcollection":"2020-06-01T07:11:47.22Z","dateoftransformation":"2020-07-25T07:25:11.051Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The results of treatment of 21 patients with multiple injuries, including 12 (57.2%) patients with lesions of limb bones, 9 (42.8%) patients with injuries of the pelvis treated at the Department of Traumatology number 2, 5 for the period of 2013 to 2014 were analyzed by the authors. Developed gentle immobilization of the lower limbs in patients with multiple injuries provides adequate fixation and extension of the lower limb in the intensive care period and during emergency external fixation by the device of external fixation device shin bone or hip.The introduction of surgical treatment of patients in the acute period of polytrauma, using minimally invasive fracture fixation technology allowed providing the early stabilization of the victim’s condition, to avoid diagnostic errors and obtain positive results of treatment in 98% of cases."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-02-28"},"distributionlocation":"","hostedby":{"key":"10|07b5c0ccd4fe::fae55652d82f1c4f6a6e3d2637b9ebe1","value":"World Science"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://rsglobal.pl/index.php/ws/article/view/895"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2413-1032","issnPrinted":"2414-6404","name":"World Science","sp":"","vol":""},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635147062560,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frsglobal.pl%2Findex.php%2Findex%2Foai","datestamp":"2020-05-23T18:09:46Z","harvestDate":"2020-06-01T07:11:47.22Z","identifier":"oai:ojs2.rsglobal.pl:article/895","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:ojs2.rsglobal.pl:article/895","50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"RS Global Sp. z O.O."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Vol 3 No 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"World Science; Том 3 № 2(6) (2016): World Science; 43-50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2414-6404"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2413-1032"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Polytrauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"multiple trauma"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"combined injury injury"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the severity of the damage"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"the device of external fixator"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"submersible osteosynthesis"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"«damage control»"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА"}]} -{"author":[{"fullname":"Kemppainen, Mika","name":"Mika","pid":[],"rank":1,"surname":"Kemppainen"},{"fullname":"Virkkunen, Iikka","name":"Iikka","pid":[],"rank":2,"surname":"Virkkunen"},{"fullname":"Pitkänen, Jorma","name":"Jorma","pid":[],"rank":3,"surname":"Pitkänen"},{"fullname":"Paussu, Raimo","name":"Raimo","pid":[],"rank":4,"surname":"Paussu"},{"fullname":"Hänninen, Hannu","name":"Hannu","pid":[],"rank":5,"surname":"Hänninen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"dateofcollection":"2021-10-04T12:38:49.007Z","dateoftransformation":"2021-10-04T16:48:12.265Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::210c52944502777ba567442480e6a76e","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146785228,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-23T08:45:14Z","harvestDate":"2021-10-04T12:38:49.007Z","identifier":"oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::210c52944502777ba567442480e6a76e","oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kemppainen , M , Virkkunen , I , Pitkänen , J , Paussu , R & Hänninen , H 2003 , ' Comparison of realistic artificial cracks and in-service cracks ' , The e-Journal of Nondestructive Testing & Ultrasonics , vol. 8 , no. 3 , 6 . < http://www.ndt.net/article/ecndt02/401/401.htm >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Comparison of realistic artificial cracks and in-service cracks"}]} -{"author":[{"fullname":"Kelhä, Väinö","name":"Väinö","pid":[],"rank":1,"surname":"Kelhä"},{"fullname":"Manninen, M.","name":"M.","pid":[],"rank":2,"surname":"Manninen"},{"fullname":"Oittinen, P.","name":"P.","pid":[],"rank":3,"surname":"Oittinen"},{"fullname":"Tiesmäki, Jarkko","name":"Jarkko","pid":[],"rank":4,"surname":"Tiesmäki"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"dateofcollection":"2021-10-04T12:35:21.081Z","dateoftransformation":"2021-10-04T18:07:22.62Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3754cff043a1700077031ea29f8cc240","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1974-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146808523,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-08-31T11:55:56Z","harvestDate":"2021-10-04T12:35:21.081Z","identifier":"oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::3754cff043a1700077031ea29f8cc240","oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kelhä , V , Manninen , M , Oittinen , P & Tiesmäki , J 1974 , ' A parallel plate tackmeter for measuring the splitting resistance of printing inks ' , Surface Coatings International: JOCCA , vol. 57 , pp. 184-188 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A parallel plate tackmeter for measuring the splitting resistance of printing inks"}]} -{"author":[{"fullname":"Mononen, Petri","name":"Petri","pid":[],"rank":1,"surname":"Mononen"},{"fullname":"Innamaa, Satu","name":"Satu","pid":[],"rank":2,"surname":"Innamaa"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2021-10-04T12:44:18.145Z","dateoftransformation":"2021-10-04T18:11:36.436Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::3875365f5052758953b072682e62bc80","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8602dae4-00e8-4f45-828b-65a367eb4730"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146809712,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-09-17T12:01:51Z","harvestDate":"2021-10-04T12:44:18.145Z","identifier":"oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","50|355e65625b88::3875365f5052758953b072682e62bc80"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mononen , P & Innamaa , S 2013 , ' Enhancing journey quality : Field Operational Test of Aftermarket and Nomadic Devices in Vehicles ' , Baltic Transport Journal , pp. 46-47 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Enhancing journey quality:Field Operational Test of Aftermarket and Nomadic Devices in Vehicles"}]} -{"author":[{"fullname":"Tsupari, Eemeli","name":"Eemeli","pid":[],"rank":1,"surname":"Tsupari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"dateofcollection":"2021-10-04T12:44:48.87Z","dateoftransformation":"2021-10-04T19:47:43.327Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Despite international agreements, global greenhouse gas (GHG) emissions have not decreased according to the targets. Consequently, our generation is creating an enormous problem for future generations. As climate change is a global problem, GHG emissions must decrease globally. Consequently, international policies are needed, actions should be effective and the impacts should be assessed with broad boundaries. In Europe, the cornerstone of climate policy is the EU Emissions Trading Scheme (EU ETS) but the rebound impacts within the EU ETS are often excluded in the assessments. This dissertation examines the impacts of major CO2 emission reduction solutions with different system boundaries, highlighting the importance of boundary selection on the results. In addition, the economic feasibilities of the selected solutions are evaluated.The case examples represent the most important sectors in terms of global CO2 emissions, such as electricity and heat production, the steel industry and transport. The studied technologies include efficient Waste-to-Energy (WtE) concepts with high power-to-heat ratio, utilisation of CO2 Capture and Storage (CCS) in different applications, replacing steel mill blast furnaces with Oxygen Blast Furnaces (OBF), Combined Heat and Power (CHP) and Carbon Capture and Utilisation (CCU) for storable fuels, which can be used for example in transportation. The results highlight the importance of the consequences in the electricity production system as well as the rebound impacts in the EU ETS. For example, the studied concepts to decrease direct GHG emissions of steel mills lead to increased power purchase from markets and consequently increase in emissions of the power system. The impacts of CCU concepts based on electrolysis increase the emissions in electricity production but enable a decrease in the usage of fossil fuels in transportation. In addition, converting electricity to storable fuels enable higher shares of variable solar and wind energy in the power systems. The consequences in the power systems are complex, including for example the impacts on electricity imports and exports, future investments and the EU ETS. Even if these impacts can be recognised by qualitative means, unambiguous quantitative consequences cannot be given. Understanding the decisive impacts of the framework and boundaries is crucial to interpreting different assessments and making effective actions and policy decisions. Solutions which decrease emissions within a narrow system boundary can actually increase the emissions of the broader system."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0006","classname":"Doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d62ac5ef-7347-400f-95b2-59d970ceb505"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635146840046,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-05-18T10:33:00Z","harvestDate":"2021-10-04T12:44:48.87Z","identifier":"oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aalto University"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tsupari , E 2018 , ' Impact of system boundaries on the effectiveness of climate change mitigation actions : Dissertation ' , Doctor Degree , Aalto University . < http://urn.fi/URN:ISBN:978-952-60-8358-2 >"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"energy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"environmental science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"climate change mitigation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"greenhouse gases"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"carbon dioxide"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"emissions trading"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"economic feasibility"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/dk/atira/pure/sustainabledevelopmentgoals/climate_action"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"SDG 13 - Climate Action"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Impact of system boundaries on the effectiveness of climate change mitigation actions:Dissertation"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json deleted file mode 100644 index 40ad881e38..0000000000 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/fos_prepared.json +++ /dev/null @@ -1,50 +0,0 @@ -{"doi":"50|doi_________b24ab3e127aa67e2a1017292988d571f","level1":"engineering and technology","level2":"nano-technology","level3":"nanoscience & nanotechnology"} -{"doi":"50|doi_________f648499be6ba8e83226834167f22a7cb","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} -{"doi":"50|doi_________439b0885db30c66ecf62a2a6a4451116","level1":"social sciences","level2":"psychology and cognitive sciences","level3":"NULL"} -{"doi":"50|doi_________c22f065c9ab34fee87a3952fb79f5ee6","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"50|doi_________9ea3d8140aaa8add5e929be92f0dab13","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"50|doi_________12f446645a131ab55fc5dabf6692da31","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} -{"doi":"50|doi_________550a40c9b57fcc974c127ad69dd60df2","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"environmental sciences"} -{"doi":"50|doi_________6b95eee8d0eff26b7cca8e960968ad62","level1":"medical and health sciences","level2":"clinical medicine","level3":"oncology & carcinogenesis"} -{"doi":"50|doi_________5ff24d570b3984ddcf04f58b771ad635","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} -{"doi":"50|doi_________b909cd0953065bc611bac1d89e96bf6c","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} -{"doi":"50|doi_________d56d9dc21f317b3e009d5b6c8ea87212","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"50|doi_________3ea4d5309ce7934ed281342dd1f70867","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"50|doi_________f23029f6f070b4b2e1c852ce7f9f5f32","level1":"medical and health sciences","level2":"other medical science","level3":"health policy & services"} -{"doi":"50|doi_________2ea2433fb314647e72cab828dd529f00","level1":"natural sciences","level2":"biological sciences","level3":"plant biology & botany"} -{"doi":"50|doi_________ffe10c217b3a96f7584cf09fdad579e1","level1":"engineering and technology","level2":"NULL","level3":"NULL"} -{"doi":"50|doi_________45e682be5a57e2fabea2c02ba0752f1a","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"50|doi_________312f5db945545c66e6f18c62faf6290c","level1":"medical and health sciences","level2":"health sciences","level3":"NULL"} -{"doi":"50|doi_________d53bd3a4e48921415e554a4edc91d6db","level1":"natural sciences","level2":"physical sciences","level3":"NULL"} -{"doi":"50|doi_________2819592482582ff33363069d116abd64","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"50|doi_________e41a47550fed93904e443123b752bc2b","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} -{"doi":"50|doi_________43e8bf6e95cd3043f510771cf0b0c984","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"} -{"doi":"50|doi_________000c1dc14e99b89fc52976533338fe4c","level1":"engineering and technology","level2":"mechanical engineering","level3":"mechanical engineering & transports"} -{"doi":"50|doi_________5cf55e49aebd633f1326d28451d1f6e5","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"computer hardware & architecture"} -{"doi":"50|doi_________5b79bd7bd9f87361b4a4abc3cbb2df75","level1":"natural sciences","level2":"mathematics","level3":"numerical & computational mathematics"} -{"doi":"50|doi_________51a7b4738d332570beb98be13e95d369","level1":"natural sciences","level2":"chemical sciences","level3":"NULL"} -{"doi":"50|doi_________6a9271220296585204ff81d651215d63","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"50|doi_________a2b8554df106bb29a0035e2ea56046a2","level1":"medical and health sciences","level2":"health sciences","level3":"biochemistry & molecular biology"} -{"doi":"50|doi_________fa2b92d5140f67a6c69c970a9e8d2cf0","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} -{"doi":"50|doi_________b5f6c78a31abbb806e1e375a73231dd1","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} -{"doi":"50|doi_________358777f48b491554cdee63c4dcabe6aa","level1":"natural sciences","level2":"chemical sciences","level3":"physical chemistry"} -{"doi":"50|doi_________809f282159a9f46a41f92b6fc8d1095f","level1":"natural sciences","level2":"biological sciences","level3":"marine biology & hydrobiology"} -{"doi":"50|doi_________94f1b0b1509700d4370cbe7571000bfc","level1":"engineering and technology","level2":"industrial biotechnology","level3":"industrial engineering & automation"} -{"doi":"50|doi_________33390d9d5163da63f73acd173ef704e0","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"50|doi_________3149f0a909641720480f62faf1f886b9","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"50|doi_________1e29deb6473bdb2c84e5966fc4c557bb","level1":"medical and health sciences","level2":"basic medicine","level3":"NULL"} -{"doi":"50|doi_________effab60be43d653f2cccf1cf6de461df","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"electrical & electronic engineering"} -{"doi":"50|doi_________14a6ce8c28da6e82412720f1330e826d","level1":"natural sciences","level2":"biological sciences","level3":"NULL"} -{"doi":"50|doi_________f59abf7f96244398a465b6e9a7375311","level1":"natural sciences","level2":"earth and related environmental sciences","level3":"NULL"} -{"doi":"50|doi_________b1b7b5c7251fa1901583ec4840e2d3ec","level1":"medical and health sciences","level2":"basic medicine","level3":"biochemistry & molecular biology"} -{"doi":"50|doi_________e024d1b738df3b24bc58fa0228542571","level1":"natural sciences","level2":"physical sciences","level3":"nuclear & particles physics"} -{"doi":"50|doi_________0e03d3592dca1baedfc74f1fbe0b9f22","level1":"natural sciences","level2":"NULL","level3":"NULL"} -{"doi":"50|doi_________0e1777e31f32984e5a261205be28da69","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} -{"doi":"50|doi_________7b464a5eb02a959aeedc7b051fe0f89f","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} -{"doi":"50|doi_________32fed3ec7c9ac157b73e050ff1c158d3","level1":"engineering and technology","level2":"electrical engineering, electronic engineering, information engineering","level3":"networking & telecommunications"} -{"doi":"50|doi_________8302f548fe724bc58e53e2bd329cb3c3","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} -{"doi":"50|doi_________44fbef7e57e4123ef44f10bb073f9ef8","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} -{"doi":"50|doi_________c8e7d24b1649024e85bcf9a4c520a65b","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} -{"doi":"50|doi_________877a3c4a72d2a6b4aa60a2da016237df","level1":"medical and health sciences","level2":"health sciences","level3":"genetics & heredity"} -{"doi":"50|doi_________7af7cda00a082fa8624493d74357d3f7","level1":"engineering and technology","level2":"other engineering and technologies","level3":"building & construction"} -{"doi":"50|doi_________20201cc71fd003dae0d58016dd4ef4af","level1":"agricultural and veterinary sciences","level2":"agriculture, forestry, and fisheries","level3":"agronomy & agriculture"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json deleted file mode 100644 index a01ce3f687..0000000000 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bypassactionset/fos/publicationmatch.json +++ /dev/null @@ -1,6 +0,0 @@ -{"collectedfrom":[{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":true,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146800948,"id":"50|doi_________b24ab3e127aa67e2a1017292988d571f","originalId":["oai:episciences.org:eid:7781","50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"],"pid":[],"dateofcollection":"2021-10-05T19:43:46.821Z","dateoftransformation":"2021-10-05T20:15:55.705Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-05T19:43:46.821Z","altered":true,"baseURL":"https%3A%2F%2Foai.episciences.org%2F","identifier":"oai:episciences.org:eid:7781","datestamp":"2017-05-01","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":[{"id":"influence","unit":[{"key":"score","value":"5.91019644836e-09","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity_alt","unit":[{"key":"score","value":"0.0","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity","unit":[{"key":"score","value":"9.88840807598e-09","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]}],"author":[{"fullname":"Tanouayi, Gnon","name":"Gnon","surname":"Tanouayi","rank":1,"pid":[],"affiliation":null},{"fullname":"GNANDI, Kissao","name":"Kissao","surname":"Gnandi","rank":2,"pid":[],"affiliation":null},{"fullname":"Ouro-Sama, Kamilou","name":"Kamilou","surname":"Ouro-Sama","rank":3,"pid":[],"affiliation":null},{"fullname":"Ahoudi, Housséni","name":"Housséni","surname":"Ahoudi","rank":4,"pid":[],"affiliation":null},{"fullname":"Solitoke, Hodabalo Dhéoulaba","name":"Hodabalo Dhéoulaba","surname":"Solitoke","rank":5,"pid":[],"affiliation":null},{"fullname":"Badassan, Tchaa Esso-Essinam","name":"Tchaa Esso-Essinam","surname":"Badassan","rank":6,"pid":[],"affiliation":null},{"fullname":"Nyametso, A. Yawovi","name":"A. Yawovi","surname":"Nyametso","rank":7,"pid":[],"affiliation":null},{"fullname":"Agbéko, Aduayi-Akué Adoté","name":"Aduayi-Akué Adoté","surname":"Agbéko","rank":8,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"fra/fre","classname":"French","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"clay","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"geo-materials","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"phosphorite of Hahotoé-Kpogamé","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"removal of fluoride","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"argilite","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"élimination du fluor","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"géo-matériaux","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"phosphate de Hahotoé-Kpogamé","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"[SDE.IE]Environmental Sciences/Environmental Engineering","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Défluoruration des eaux à l’aide des résidus du traitement des phosphates naturels et des argilites feuilletées","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"This study is a contribution to the development of adsorption techniques for the removal of fluoride in natural waters. The work is carried out on a laboratory scale using local geo-materials sorbents, on the one hand the residues from the treatment of natural phosphorite of Hahotoé-Kpogamé and on the other hand the attapulgite clay mineral from the costal basin of Togo. The work carried out concerns the adsorption of fluoride on those sorbents. The following parameters are batch tested on synthetic fluoride solutions: time, solution pH, geo-material dose and fluoride concentration. The fluoride is analyzed by absorption spectrometry. The adsorption yields on the phosphorite treatment residues for aqueous fluoride solutions at an initial concentration of 10 mg/L and an adsorbent concentration of 10 g/L are 49 % at pH 6.5 and 66 % at pH 4.0. In the same experimental conditions, the yields on clay minerals are 28.2 % and 36.3 %. These yields are logically improved by increasing the adsorbent dosage (from 2 to 30 g/L). Additional tests are carried out on natural water at an initial fluoride concentration of 3.76 mg/L.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Cette étude est une contribution au développement de techniques d’adsorption pour l’élimination du fluor dans les eaux naturelles. Les travaux ont été réalisés à l’échelle du laboratoire en utilisant comme sorbants des géo-matériaux locaux, d’une part les résidus du traitement des phosphates naturels de Hahotoé-Kpogamé et d’autre part l’argilite feuilletée du bassin sédimentaire côtier du Togo. Les travaux réalisés concernent l’adsorption du fluor sur les sorbants considérés. Les paramètres suivants ont été testés en batch sur des solutions synthétiques de fluor : le temps, le pH de la solution, la dose du géo-matériau et la concentration du fluor. Le fluor a été dosé par spectromètrie d’absorption. Les rendements d’adsorption sur les résidus de traitement des phosphates pour des solutions aqueuses de fluor à concentration initiale de 10 mg/L et une concentration en adsorbant de 10 g/L ont été de 49 % à pH 6,5 et 66 % à pH de 4,0. Dans les mêmes conditions expérimentales, les rendements sur les argilites ont été de 28,2 % et 36,3 %. Ces rendements ont logiquement été améliorés en augmentant le dosage en adsorbant (de 2 à 30 g/L). Des essais complémentaires ont été réalisés sur une eau naturelle à une concentration initiale en fluor de 3,76 mg/L.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-05-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[{"value":"ISSN: 2778-844X","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Environnement, Ingénierie & Développement","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Episciences.org","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"eid:7781 - Environnement, Ingénierie & Développement, 2017-05-01, N°73 - mai 2017","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[{"value":"Importer of dst articles previously hosted by inist Eid system account","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences","dataInfo":null},"url":["https://eid.episciences.org/7781"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::6824b298c96ba906a3e6a70593affbf5","value":"Episciences","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.4267/dechets-sciences-techniques.3534","qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-05-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} -{"collectedfrom":[{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635147062560,"id":"50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b","originalId":["oai:ojs2.rsglobal.pl:article/895","50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"],"pid":[],"dateofcollection":"2020-06-01T07:11:47.22Z","dateoftransformation":"2020-07-25T07:25:11.051Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-06-01T07:11:47.22Z","altered":true,"baseURL":"https%3A%2F%2Frsglobal.pl%2Findex.php%2Findex%2Foai","identifier":"oai:ojs2.rsglobal.pl:article/895","datestamp":"2020-05-23T18:09:46Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Polytrauma","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"multiple trauma","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"combined injury injury","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"the severity of the damage","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"the device of external fixator","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"submersible osteosynthesis","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"«damage control»","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Polytrauma","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"multiple trauma","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"combined injury injury","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"the severity of the damage","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"the device of external fixator","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"submersible osteosynthesis","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"«damage control»","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"ЛЕЧЕНИЯ ПАЦИЕНТОВ С СОЧЕТАННЫМИ И МНОЖЕСТВЕННЫМИ ТРАВМАМИ КОНЕЧНОСТЕЙ И КОСТЕЙ ТАЗА","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The results of treatment of 21 patients with multiple injuries, including 12 (57.2%) patients with lesions of limb bones, 9 (42.8%) patients with injuries of the pelvis treated at the Department of Traumatology number 2, 5 for the period of 2013 to 2014 were analyzed by the authors. Developed gentle immobilization of the lower limbs in patients with multiple injuries provides adequate fixation and extension of the lower limb in the intensive care period and during emergency external fixation by the device of external fixation device shin bone or hip.The introduction of surgical treatment of patients in the acute period of polytrauma, using minimally invasive fracture fixation technology allowed providing the early stabilization of the victim’s condition, to avoid diagnostic errors and obtain positive results of treatment in 98% of cases.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2016-02-28","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"RS Global Sp. z O.O.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"World Science; Vol 3 No 2(6) (2016): World Science; 43-50","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"World Science; Том 3 № 2(6) (2016): World Science; 43-50","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2414-6404","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2413-1032","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"application/pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":{"value":"https://creativecommons.org/licenses/by/4.0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|07b5c0ccd4fe::fae55652d82f1c4f6a6e3d2637b9ebe1","value":"World Science","dataInfo":null},"url":["https://rsglobal.pl/index.php/ws/article/view/895"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::160a261e5d06fd542c2efcac6e17e08c","value":"RS Global Journals","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2016-02-28","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"World Science","issnPrinted":"2414-6404","issnOnline":"2413-1032","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} -{"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":true,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146785228,"id":"50|355e65625b88::210c52944502777ba567442480e6a76e","originalId":["50|355e65625b88::210c52944502777ba567442480e6a76e","oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"pid":[],"dateofcollection":"2021-10-04T12:38:49.007Z","dateoftransformation":"2021-10-04T16:48:12.265Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-04T12:38:49.007Z","altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","identifier":"oai:cris.vtt.fi:publications/66c69ab9-a33b-4817-97cd-1f38b3b31405","datestamp":"2019-09-23T08:45:14Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[{"fullname":"Kemppainen, Mika","name":"Mika","surname":"Kemppainen","rank":1,"pid":[],"affiliation":null},{"fullname":"Virkkunen, Iikka","name":"Iikka","surname":"Virkkunen","rank":2,"pid":[],"affiliation":null},{"fullname":"Pitkänen, Jorma","name":"Jorma","surname":"Pitkänen","rank":3,"pid":[],"affiliation":null},{"fullname":"Paussu, Raimo","name":"Raimo","surname":"Paussu","rank":4,"pid":[],"affiliation":null},{"fullname":"Hänninen, Hannu","name":"Hannu","surname":"Hänninen","rank":5,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"Comparison of realistic artificial cracks and in-service cracks","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2003-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[{"value":"Kemppainen , M , Virkkunen , I , Pitkänen , J , Paussu , R & Hänninen , H 2003 , ' Comparison of realistic artificial cracks and in-service cracks ' , The e-Journal of Nondestructive Testing & Ultrasonics , vol. 8 , no. 3 , 6 . < http://www.ndt.net/article/ecndt02/401/401.htm >","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"url":["https://cris.vtt.fi/en/publications/66c69ab9-a33b-4817-97cd-1f38b3b31405"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2003-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} -{"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":true,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146808523,"id":"50|355e65625b88::3754cff043a1700077031ea29f8cc240","originalId":["50|355e65625b88::3754cff043a1700077031ea29f8cc240","oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"pid":[],"dateofcollection":"2021-10-04T12:35:21.081Z","dateoftransformation":"2021-10-04T18:07:22.62Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-04T12:35:21.081Z","altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","identifier":"oai:cris.vtt.fi:publications/d2b5302f-004a-407c-8f9c-4312e0dbf679","datestamp":"2021-08-31T11:55:56Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[{"fullname":"Kelhä, Väinö","name":"Väinö","surname":"Kelhä","rank":1,"pid":[],"affiliation":null},{"fullname":"Manninen, M.","name":"M.","surname":"Manninen","rank":2,"pid":[],"affiliation":null},{"fullname":"Oittinen, P.","name":"P.","surname":"Oittinen","rank":3,"pid":[],"affiliation":null},{"fullname":"Tiesmäki, Jarkko","name":"Jarkko","surname":"Tiesmäki","rank":4,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"A parallel plate tackmeter for measuring the splitting resistance of printing inks","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"1974-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[{"value":"Kelhä , V , Manninen , M , Oittinen , P & Tiesmäki , J 1974 , ' A parallel plate tackmeter for measuring the splitting resistance of printing inks ' , Surface Coatings International: JOCCA , vol. 57 , pp. 184-188 .","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"url":["https://cris.vtt.fi/en/publications/d2b5302f-004a-407c-8f9c-4312e0dbf679"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"1974-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} -{"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146809712,"id":"50|355e65625b88::3875365f5052758953b072682e62bc80","originalId":["oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","50|355e65625b88::3875365f5052758953b072682e62bc80"],"pid":[],"dateofcollection":"2021-10-04T12:44:18.145Z","dateoftransformation":"2021-10-04T18:11:36.436Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-04T12:44:18.145Z","altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","identifier":"oai:cris.vtt.fi:publications/8602dae4-00e8-4f45-828b-65a367eb4730","datestamp":"2021-09-17T12:01:51Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[{"fullname":"Mononen, Petri","name":"Petri","surname":"Mononen","rank":1,"pid":[],"affiliation":null},{"fullname":"Innamaa, Satu","name":"Satu","surname":"Innamaa","rank":2,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"Enhancing journey quality:Field Operational Test of Aftermarket and Nomadic Devices in Vehicles","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[],"dateofacceptance":{"value":"2013-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":null,"embargoenddate":null,"source":[{"value":"Mononen , P & Innamaa , S 2013 , ' Enhancing journey quality : Field Operational Test of Aftermarket and Nomadic Devices in Vehicles ' , Baltic Transport Journal , pp. 46-47 .","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"url":["https://cris.vtt.fi/en/publications/8602dae4-00e8-4f45-828b-65a367eb4730"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2013-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} -{"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1635146840046,"id":"50|355e65625b88::54a1c76f520bb2c8da27d12e42891088","originalId":["oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"],"pid":[],"dateofcollection":"2021-10-04T12:44:48.87Z","dateoftransformation":"2021-10-04T19:47:43.327Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-10-04T12:44:48.87Z","altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","identifier":"oai:cris.vtt.fi:publications/d62ac5ef-7347-400f-95b2-59d970ceb505","datestamp":"2021-05-18T10:33:00Z","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":null,"author":[{"fullname":"Tsupari, Eemeli","name":"Eemeli","surname":"Tsupari","rank":1,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"energy","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"environmental science","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"climate change mitigation","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"greenhouse gases","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"carbon dioxide","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"emissions trading","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"economic feasibility","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"/dk/atira/pure/sustainabledevelopmentgoals/climate_action","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"SDG 13 - Climate Action","qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Impact of system boundaries on the effectiveness of climate change mitigation actions:Dissertation","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"Despite international agreements, global greenhouse gas (GHG) emissions have not decreased according to the targets. Consequently, our generation is creating an enormous problem for future generations. As climate change is a global problem, GHG emissions must decrease globally. Consequently, international policies are needed, actions should be effective and the impacts should be assessed with broad boundaries. In Europe, the cornerstone of climate policy is the EU Emissions Trading Scheme (EU ETS) but the rebound impacts within the EU ETS are often excluded in the assessments. This dissertation examines the impacts of major CO2 emission reduction solutions with different system boundaries, highlighting the importance of boundary selection on the results. In addition, the economic feasibilities of the selected solutions are evaluated.The case examples represent the most important sectors in terms of global CO2 emissions, such as electricity and heat production, the steel industry and transport. The studied technologies include efficient Waste-to-Energy (WtE) concepts with high power-to-heat ratio, utilisation of CO2 Capture and Storage (CCS) in different applications, replacing steel mill blast furnaces with Oxygen Blast Furnaces (OBF), Combined Heat and Power (CHP) and Carbon Capture and Utilisation (CCU) for storable fuels, which can be used for example in transportation. The results highlight the importance of the consequences in the electricity production system as well as the rebound impacts in the EU ETS. For example, the studied concepts to decrease direct GHG emissions of steel mills lead to increased power purchase from markets and consequently increase in emissions of the power system. The impacts of CCU concepts based on electrolysis increase the emissions in electricity production but enable a decrease in the usage of fossil fuels in transportation. In addition, converting electricity to storable fuels enable higher shares of variable solar and wind energy in the power systems. The consequences in the power systems are complex, including for example the impacts on electricity imports and exports, future investments and the EU ETS. Even if these impacts can be recognised by qualitative means, unambiguous quantitative consequences cannot be given. Understanding the decisive impacts of the framework and boundaries is crucial to interpreting different assessments and making effective actions and policy decisions. Solutions which decrease emissions within a narrow system boundary can actually increase the emissions of the broader system.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2018-10-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Aalto University","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"Tsupari , E 2018 , ' Impact of system boundaries on the effectiveness of climate change mitigation actions : Dissertation ' , Doctor Degree , Aalto University . < http://urn.fi/URN:ISBN:978-952-60-8358-2 >","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0006","classname":"Doctoral thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"url":["https://cris.vtt.fi/en/publications/d62ac5ef-7347-400f-95b2-59d970ceb505"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2018-10-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":null} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index bf6301ec47..602aaf6e66 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -331,7 +331,6 @@ public class DumpJobTest { Assertions .assertEquals( Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); - Assertions.assertEquals(null, gr.getBestaccessright().getOpenAccessRoute()); Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); From a7763d249205f71a6ae274eb3e4b7c3f53619aaf Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 12 Nov 2021 09:56:45 +0100 Subject: [PATCH 42/60] removed alternate identifier in resolutionMap --- .../resolution/SparkResolveRelation.scala | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala index 9e44e017ce..cd517dd5e2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveRelation.scala @@ -96,6 +96,21 @@ object SparkResolveRelation { .text(s"$graphBasePath/relation") } + def extractInstanceCF(input: String): List[(String, String)] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + val result: List[(String, String)] = for { + JObject(iObj) <- json \ "instance" + JField("collectedfrom", JObject(cf)) <- iObj + JField("instancetype", JObject(instancetype)) <- iObj + JField("value", JString(collectedFrom)) <- cf + JField("classname", JString(classname)) <- instancetype + } yield (classname, collectedFrom) + + result + + } + def extractPidsFromRecord(input: String): (String, List[(String, String)]) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats @@ -108,14 +123,7 @@ object SparkResolveRelation { JField("classid", JString(pidType)) <- qualifier } yield (pidValue, pidType) - val alternateIds: List[(String, String)] = for { - JObject(pids) <- json \\ "alternateIdentifier" - JField("value", JString(pidValue)) <- pids - JField("qualifier", JObject(qualifier)) <- pids - JField("classid", JString(pidType)) <- qualifier - } yield (pidValue, pidType) - - (id, result ::: alternateIds) + (id, result) } From 1f2a3d1af073d79b56499db89956b4fd8fae7096 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 12 Nov 2021 10:15:11 +0100 Subject: [PATCH 43/60] depending on dhp-schemas:2.8.22 (release) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 02bc5d8d4e..274192c676 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.21] + [2.8.22] [4.0.3] [6.0.5] [3.1.6] From 716021546e8a13cac61996fbc07d11e4997d5a4f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 12 Nov 2021 10:18:01 +0100 Subject: [PATCH 44/60] [Bypass Action Set] minor fix --- .../PrepareBipFinder.java | 2 +- .../PrepareFOSSparkJob.java | 2 +- .../distribute_fos_parameters.json | 21 ------------- .../oozie_app/workflow.xml | 31 +++++++++++++++---- ...arameters.json => prepare_parameters.json} | 0 .../createunresolvedentities/PrepareTest.java | 2 +- 6 files changed, 28 insertions(+), 30 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/{bip_prepare_parameters.json => prepare_parameters.json} (100%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index af87094ffb..48e1cd8ca9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -45,7 +45,7 @@ public class PrepareBipFinder implements Serializable { .toString( PrepareBipFinder.class .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip_prepare_parameters.json")); + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index a84990fd34..839df13d05 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -35,7 +35,7 @@ public class PrepareFOSSparkJob implements Serializable { .toString( PrepareFOSSparkJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json")); + "/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json deleted file mode 100644 index 17b1a95c0a..0000000000 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/distribute_fos_parameters.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"out", - "paramLongName":"outputPath", - "paramDescription": "the output path", - "paramRequired": true - }, - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml index 31c0186186..d53504fe63 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/oozie_app/workflow.xml @@ -1,3 +1,4 @@ + @@ -49,6 +50,25 @@ + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -79,7 +99,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${bipScorePath} - --outputPath${workingDir}/prepared/bip + --outputPath${workingDir}/prepared @@ -102,7 +122,7 @@ yarn cluster Produces the unresolved from FOS! - eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob + eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareFOSSparkJob dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -115,8 +135,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/input/fos - - --outputPath${workingDir}/prepared/fos + --outputPath${workingDir}/prepared @@ -132,7 +151,7 @@ yarn cluster Saves the result produced for bip and fos by grouping results with the same id - eu.dnetlib.dhp.actionmanager.bipfinder.CollectAndSave + eu.dnetlib.dhp.actionmanager.createunresolvedentities.SparkSaveUnresolved dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -144,7 +163,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --inputPath${workingDir}/prepared + --sourcePath${workingDir}/prepared --outputPath${outputPath} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip_prepare_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/bip_prepare_parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/createunresolvedentities/prepare_parameters.json diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index 2f7a171b3c..a9b67e85c9 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -143,7 +143,7 @@ public class PrepareTest { } @Test - void getFOSFileTest() throws CollectorException, IOException, ClassNotFoundException { + void getFOSFileTest() throws IOException, ClassNotFoundException { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/createunresolvedentities/fos/h2020_fos_sbs.csv") From 47ccb53c4f3a130aee2f027b2556722bba029b24 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 12 Nov 2021 10:54:09 +0100 Subject: [PATCH 45/60] [Bypass Action Set] modification for comment https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/157#issuecomment-4915 --- .../createunresolvedentities/PrepareBipFinder.java | 3 ++- .../createunresolvedentities/PrepareFOSSparkJob.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 48e1cd8ca9..40d420ec4c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.utils.DHPUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.hdfs.client.HdfsUtils; import org.apache.spark.SparkConf; @@ -93,7 +94,7 @@ public class PrepareBipFinder implements Serializable { .map((MapFunction) v -> { Result r = new Result(); - r.setId(getUnresolvedDoiIndentifier(v.getId())); + r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), "doi")); r.setMeasures(getMeasure(v)); return r; }, Encoders.bean(Result.class)) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 839df13d05..d77fb38bc7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -8,6 +8,7 @@ import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.utils.DHPUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -78,7 +79,7 @@ public class PrepareFOSSparkJob implements Serializable { }, Encoders.bean(FOSDataModel.class)) .map((MapFunction) value -> { Result r = new Result(); - r.setId(getUnresolvedDoiIndentifier(value.getDoi())); + r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), "doi")); r.setSubject(getSubjects(value)); return r; }, Encoders.bean(Result.class)) From 881113743fefb575331909baeaf48f5b14562e91 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 12 Nov 2021 10:55:50 +0100 Subject: [PATCH 46/60] [Bypass Action Set] refactoring --- .../createunresolvedentities/Constants.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java index cae4d6d438..47a2359105 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java @@ -11,8 +11,7 @@ import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Qualifier; + public class Constants { @@ -28,10 +27,12 @@ public class Constants { public static final String FOS_CLASS_ID = "FOS"; public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; - public final static String NULL = "NULL"; + public static final String NULL = "NULL"; public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private Constants(){} + public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) { return Optional .ofNullable(parser.get("isSparkSessionManaged")) @@ -47,9 +48,5 @@ public class Constants { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } - public static String getUnresolvedDoiIndentifier(String doi) { - StringBuilder sb = new StringBuilder(); - sb.append(UNREOSLVED_PREFIX).append(doi).append(UNREOSLVED_POSTFIX_DOI); - return sb.toString(); - } + } From 92d0e18b557e90a86816243df828664883013d15 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 12 Nov 2021 10:56:58 +0100 Subject: [PATCH 47/60] [Bypass Action Set] used constant DOI instead of "doi" --- .../dhp/actionmanager/createunresolvedentities/Constants.java | 2 -- .../createunresolvedentities/PrepareBipFinder.java | 2 +- .../createunresolvedentities/PrepareFOSSparkJob.java | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java index 47a2359105..b86adb27e4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java @@ -15,8 +15,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class Constants { - public static final String UNREOSLVED_PREFIX = "unresolved:"; - public static final String UNREOSLVED_POSTFIX_DOI = ":doi"; public static final String DOI = "doi"; public static final String UPDATE_DATA_INFO_TYPE = "update"; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 40d420ec4c..b1f6710314 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -94,7 +94,7 @@ public class PrepareBipFinder implements Serializable { .map((MapFunction) v -> { Result r = new Result(); - r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), "doi")); + r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI)); r.setMeasures(getMeasure(v)); return r; }, Encoders.bean(Result.class)) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index d77fb38bc7..17631e3962 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -79,7 +79,7 @@ public class PrepareFOSSparkJob implements Serializable { }, Encoders.bean(FOSDataModel.class)) .map((MapFunction) value -> { Result r = new Result(); - r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), "doi")); + r.setId(DHPUtils.generateUnresolvedIdentifier(value.getDoi(), DOI)); r.setSubject(getSubjects(value)); return r; }, Encoders.bean(Result.class)) From 157d33ebf9818530518c30d04fb88b6d1124e834 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 15 Nov 2021 09:58:48 +0100 Subject: [PATCH 48/60] [Bypass Action Set] Refactoring --- .../actionmanager/createunresolvedentities/Constants.java | 5 ++--- .../createunresolvedentities/PrepareBipFinder.java | 2 +- .../createunresolvedentities/PrepareFOSSparkJob.java | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java index b86adb27e4..c508d4dbca 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java @@ -12,7 +12,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; - public class Constants { public static final String DOI = "doi"; @@ -29,7 +28,8 @@ public class Constants { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private Constants(){} + private Constants() { + } public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) { return Optional @@ -46,5 +46,4 @@ public class Constants { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index b1f6710314..3d68db27bf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -10,7 +10,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.utils.DHPUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.hdfs.client.HdfsUtils; import org.apache.spark.SparkConf; @@ -34,6 +33,7 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareBipFinder implements Serializable { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 17631e3962..5ae2f8c882 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -8,7 +8,6 @@ import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.utils.DHPUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -26,6 +25,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareFOSSparkJob implements Serializable { private static final Logger log = LoggerFactory.getLogger(PrepareFOSSparkJob.class); From 6f1a434e90f6fb87ed14b555a11dae09ce286962 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 15 Nov 2021 09:59:23 +0100 Subject: [PATCH 49/60] [Bypass Action Set] Fixed test to consider the new identifier utils --- .../createunresolvedentities/PrepareTest.java | 6 +++--- .../createunresolvedentities/ProduceTest.java | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java index a9b67e85c9..c48ccc8c2d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareTest.java @@ -93,7 +93,7 @@ public class PrepareTest { Assertions.assertEquals(86, tmp.count()); - String doi1 = "unresolved:10.0000/096020199389707:doi"; + String doi1 = "unresolved::10.0000/096020199389707::doi"; Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); Assertions.assertEquals(3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getMeasures().size()); @@ -193,7 +193,7 @@ public class PrepareTest { .textFile(workingDir.toString() + "/work/fos") .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - String doi1 = "unresolved:10.3390/s18072310:doi"; + String doi1 = "unresolved::10.3390/s18072310::doi"; assertEquals(50, tmp.count()); assertEquals(1, tmp.filter(row -> row.getId().equals(doi1)).count()); @@ -220,7 +220,7 @@ public class PrepareTest { .collect() .contains("nanoscience & nanotechnology")); - String doi = "unresolved:10.1111/1365-2656.12831:doi"; + String doi = "unresolved::10.1111/1365-2656.12831::doi"; assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count()); assertTrue( tmp diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index b02761750b..b77b5bb36f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -109,12 +109,12 @@ public class ProduceTest { Assertions.assertEquals(135, tmp.count()); - Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")).count()); + Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")).count()); Assertions .assertEquals( 3, tmp - .filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")) + .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .collect() .get(0) .getSubject() @@ -123,14 +123,14 @@ public class ProduceTest { Assertions .assertEquals( 3, tmp - .filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")) + .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .collect() .get(0) .getMeasures() .size()); List sbjs = tmp - .filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")) + .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .flatMap(row -> row.getSubject().iterator()) .collect(); @@ -178,7 +178,7 @@ public class ProduceTest { sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("nanoscience & nanotechnology")); List measures = tmp - .filter(row -> row.getId().equals("unresolved:10.3390/s18072310:doi")) + .filter(row -> row.getId().equals("unresolved::10.3390/s18072310::doi")) .flatMap(row -> row.getMeasures().iterator()) .collect(); Assertions @@ -217,7 +217,7 @@ public class ProduceTest { Assertions .assertEquals( 49, tmp - .filter(row -> !row.getId().equals("unresolved:10.3390/s18072310:doi")) + .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) .filter(row -> row.getSubject() != null) .count()); @@ -225,7 +225,7 @@ public class ProduceTest { .assertEquals( 85, tmp - .filter(row -> !row.getId().equals("unresolved:10.3390/s18072310:doi")) + .filter(row -> !row.getId().equals("unresolved::10.3390/s18072310::doi")) .filter(r -> r.getMeasures() != null) .count()); From 975b10b71159c979c16d13dd0f9b3f339ff49e34 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 15 Nov 2021 12:31:45 +0100 Subject: [PATCH 50/60] [actionmanager] increased spark.sql.shuffle.partitions to 5000 --- .../dhp/actionmanager/wf/publication/oozie_app/workflow.xml | 4 ++-- .../dhp/actionmanager/wf/relation/oozie_app/workflow.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml index 2450bdad74..b1c8e7c852 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml @@ -107,7 +107,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=2560 + --conf spark.sql.shuffle.partitions=5000 --inputGraphTablePath${inputGraphRootPath}/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication @@ -159,7 +159,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=2560 + --conf spark.sql.shuffle.partitions=5000 --inputGraphTablePath${workingDir}/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml index a7dce8f2f7..20ffe26d3c 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml @@ -99,7 +99,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=2560 + --conf spark.sql.shuffle.partitions=5000 --inputGraphTablePath${inputGraphRootPath}/relation --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation From d2c787d41632e168735b4a88f5c22d458dbef36d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 15 Nov 2021 14:31:15 +0100 Subject: [PATCH 51/60] [graph resolution] fixed sequence of the workflow steps --- .../eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index 4773fc87c6..e3b3fb52d3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -37,7 +37,7 @@ --graphBasePath${graphBasePath} --workingPath${workingDir} - + @@ -67,6 +67,6 @@ - + \ No newline at end of file From 48923e46a15d83a0428dc8a08ac4731b407e5ee0 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 15 Nov 2021 14:32:01 +0100 Subject: [PATCH 52/60] added documentation to Pubmed Class and also added mvn site for dhp-aggregations --- dhp-workflows/dhp-aggregation/pom.xml | 7 + .../dnetlib/dhp/sx/bio/pubmed/PMArticle.java | 171 +++++++++++++++++- .../dnetlib/dhp/sx/bio/pubmed/PMAuthor.java | 30 +++ .../eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java | 45 +++++ .../dnetlib/dhp/sx/bio/pubmed/PMJournal.java | 55 ++++++ .../dnetlib/dhp/sx/bio/pubmed/PMParser.scala | 6 + .../dnetlib/dhp/sx/bio/pubmed/PMSubject.java | 43 +++++ .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 99 ++++++++-- .../src/site/markdown/datacite.md | 0 .../src/site/markdown/index.md | 9 + .../src/site/markdown/introduction.md | 7 + .../src/site/markdown/mappings.md | 18 ++ .../src/site/markdown/pubmed.md | 62 +++++++ .../src/site/resources/images/openaire.png | Bin 0 -> 21694 bytes .../dhp-aggregation/src/site/site.xml | 32 ++++ pom.xml | 2 +- 16 files changed, 563 insertions(+), 23 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/site/markdown/datacite.md create mode 100644 dhp-workflows/dhp-aggregation/src/site/markdown/index.md create mode 100644 dhp-workflows/dhp-aggregation/src/site/markdown/introduction.md create mode 100644 dhp-workflows/dhp-aggregation/src/site/markdown/mappings.md create mode 100644 dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md create mode 100644 dhp-workflows/dhp-aggregation/src/site/resources/images/openaire.png create mode 100644 dhp-workflows/dhp-aggregation/src/site/site.xml diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 98e22d8a35..c89cc9d1dc 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -29,6 +29,13 @@ testCompile + + scala-doc + process-resources + + doc + + ${scala.version} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java index 8815284255..af0d5169d3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java @@ -5,94 +5,249 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; +/** + * This class represent an instance of Pubmed Article extracted from the native XML + * + * @author Sandro La Bruzzo + */ + public class PMArticle implements Serializable { + /** + * the Pubmed Identifier + */ private String pmid; + /** + * the DOI + */ private String doi; + /** + * the Pubmed Date extracted from Specifies a date significant to either the article's history or the citation's processing. + * All dates will have a , , and elements. Some may have an , , and element(s). + */ private String date; + /** + * This is an 'envelop' element that contains various elements describing the journal cited; i.e., ISSN, Volume, Issue, and PubDate and author name(s), however, it does not contain data itself. + */ private PMJournal journal; + /** + * The full journal title (taken from NLM cataloging data following NLM rules for how to compile a serial name) is exported in this element. Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles. The NLM journal title abbreviation is exported in the element. + */ private String title; + /** + * English-language abstracts are taken directly from the published article. + * If the article does not have a published abstract, the National Library of Medicine does not create one, + * thus the record lacks the and elements. However, in the absence of a formally + * labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used. + */ private String description; + /** + * the language in which an article was published is recorded in . + * All entries are three letter abbreviations stored in lower case, such as eng, fre, ger, jpn, etc. When a single + * record contains more than one language value the XML export program extracts the languages in alphabetic order by the 3-letter language value. + * Some records provided by collaborating data producers may contain the value und to identify articles whose language is undetermined. + */ private String language; + + /** + * NLM controlled vocabulary, Medical Subject Headings (MeSH®), is used to characterize the content of the articles represented by MEDLINE citations. * + */ private final List subjects = new ArrayList<>(); + /** + * This element is used to identify the type of article indexed for MEDLINE; + * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of + * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural). + */ private final List publicationTypes = new ArrayList<>(); + /** + * Personal and collective (corporate) author names published with the article are found in . + */ private List authors = new ArrayList<>(); - public List getPublicationTypes() { - return publicationTypes; - } - + /** + * contains the research grant or contract number (or both) that designates financial support by any agency of the United States Public Health Service + * or any institute of the National Institutes of Health. Additionally, beginning in late 2005, grant numbers are included for many other US and non-US funding agencies and organizations. + */ private final List grants = new ArrayList<>(); - public List getGrants() { - return grants; - } - + /** + * get the DOI + * @return a DOI + */ public String getDoi() { return doi; } + /** + * Set the DOI + * @param doi a DOI + */ public void setDoi(String doi) { this.doi = doi; } + /** + * get the Pubmed Identifier + * @return the PMID + */ public String getPmid() { return pmid; } + /** + * set the Pubmed Identifier + * @param pmid the Pubmed Identifier + */ public void setPmid(String pmid) { this.pmid = pmid; } + /** + * the Pubmed Date extracted from Specifies a date significant to either the article's history or the citation's processing. + * All dates will have a , , and elements. Some may have an , , and element(s). + * + * @return the Pubmed Date + */ public String getDate() { return date; } + /** + * Set the pubmed Date + * @param date + */ public void setDate(String date) { this.date = date; } + /** + * The full journal title (taken from NLM cataloging data following NLM rules for how to compile a serial name) is exported in this element. + * Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles. + * The NLM journal title abbreviation is exported in the element. + * + * @return the pubmed Journal Extracted + */ public PMJournal getJournal() { return journal; } + /** + * Set the mapped pubmed Journal + * @param journal + */ public void setJournal(PMJournal journal) { this.journal = journal; } + /** + * English-language abstracts are taken directly from the published article. + * If the article does not have a published abstract, the National Library of Medicine does not create one, + * thus the record lacks the and elements. However, in the absence of a formally + * labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used. + * + * @return the extracted pubmed Title + */ public String getTitle() { return title; } + /** + * set the pubmed title + * @param title + */ public void setTitle(String title) { this.title = title; } + /** + * English-language abstracts are taken directly from the published article. + * If the article does not have a published abstract, the National Library of Medicine does not create one, + * thus the record lacks the and elements. However, in the absence of a formally + * labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used. + * + * @return the Mapped Pubmed Article Abstracts + */ public String getDescription() { return description; } + /** + * Set the Mapped Pubmed Article Abstracts + * @param description + */ public void setDescription(String description) { this.description = description; } + /** + * Personal and collective (corporate) author names published with the article are found in . + * + * @return get the Mapped Authors lists + */ public List getAuthors() { return authors; } + /** + * Set the Mapped Authors lists + * @param authors + */ public void setAuthors(List authors) { this.authors = authors; } + /** + * This element is used to identify the type of article indexed for MEDLINE; + * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of + * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural). + * + * @return the mapped Subjects + */ public List getSubjects() { return subjects; } + /** + * + * the language in which an article was published is recorded in . + * All entries are three letter abbreviations stored in lower case, such as eng, fre, ger, jpn, etc. When a single + * record contains more than one language value the XML export program extracts the languages in alphabetic order by the 3-letter language value. + * Some records provided by collaborating data producers may contain the value und to identify articles whose language is undetermined. + * + * @return The mapped Language + */ public String getLanguage() { return language; } + /** + * + * Set The mapped Language + * + * @param language the mapped Language + */ public void setLanguage(String language) { this.language = language; } + + /** + * This element is used to identify the type of article indexed for MEDLINE; + * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of + * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural). + * + * @return the mapped Publication Type + */ + public List getPublicationTypes() { + return publicationTypes; + } + + /** + * contains the research grant or contract number (or both) that designates financial support by any agency of the United States Public Health Service + * or any institute of the National Institutes of Health. Additionally, beginning in late 2005, grant numbers are included for many other US and non-US funding agencies and organizations. + * @return the mapped grants + */ + + public List getGrants() { + return grants; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java index cef92d0031..68ef6459e0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java @@ -3,27 +3,57 @@ package eu.dnetlib.dhp.sx.bio.pubmed; import java.io.Serializable; +/** + * The type Pubmed author. + * + * @author Sandro La Bruzzo + */ public class PMAuthor implements Serializable { private String lastName; private String foreName; + /** + * Gets last name. + * + * @return the last name + */ public String getLastName() { return lastName; } + /** + * Sets last name. + * + * @param lastName the last name + */ public void setLastName(String lastName) { this.lastName = lastName; } + /** + * Gets fore name. + * + * @return the fore name + */ public String getForeName() { return foreName; } + /** + * Sets fore name. + * + * @param foreName the fore name + */ public void setForeName(String foreName) { this.foreName = foreName; } + /** + * Gets full name. + * + * @return the full name + */ public String getFullName() { return String .format("%s, %s", this.foreName != null ? this.foreName : "", this.lastName != null ? this.lastName : ""); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java index ce9420cc13..abb9084834 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java @@ -1,41 +1,86 @@ package eu.dnetlib.dhp.sx.bio.pubmed; +/** + * The type Pm grant. + * + * @author Sandro La Bruzzo + */ public class PMGrant { private String grantID; private String agency; private String country; + /** + * Instantiates a new Pm grant. + */ public PMGrant() { } + /** + * Instantiates a new Pm grant. + * + * @param grantID the grant id + * @param agency the agency + * @param country the country + */ public PMGrant(String grantID, String agency, String country) { this.grantID = grantID; this.agency = agency; this.country = country; } + /** + * Gets grant id. + * + * @return the grant id + */ public String getGrantID() { return grantID; } + /** + * Sets grant id. + * + * @param grantID the grant id + */ public void setGrantID(String grantID) { this.grantID = grantID; } + /** + * Gets agency. + * + * @return the agency + */ public String getAgency() { return agency; } + /** + * Sets agency. + * + * @param agency the agency + */ public void setAgency(String agency) { this.agency = agency; } + /** + * Gets country. + * + * @return the country + */ public String getCountry() { return country; } + /** + * Sets country. + * + * @param country the country + */ public void setCountry(String country) { this.country = country; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java index 863a23bd50..731648839e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java @@ -3,6 +3,11 @@ package eu.dnetlib.dhp.sx.bio.pubmed; import java.io.Serializable; +/** + * The type Pm journal. + * + * @author Sandro La Bruzzo + */ public class PMJournal implements Serializable { private String issn; @@ -11,42 +16,92 @@ public class PMJournal implements Serializable { private String date; private String title; + /** + * Gets issn. + * + * @return the issn + */ public String getIssn() { return issn; } + /** + * Sets issn. + * + * @param issn the issn + */ public void setIssn(String issn) { this.issn = issn; } + /** + * Gets volume. + * + * @return the volume + */ public String getVolume() { return volume; } + /** + * Sets volume. + * + * @param volume the volume + */ public void setVolume(String volume) { this.volume = volume; } + /** + * Gets issue. + * + * @return the issue + */ public String getIssue() { return issue; } + /** + * Sets issue. + * + * @param issue the issue + */ public void setIssue(String issue) { this.issue = issue; } + /** + * Gets date. + * + * @return the date + */ public String getDate() { return date; } + /** + * Sets date. + * + * @param date the date + */ public void setDate(String date) { this.date = date; } + /** + * Gets title. + * + * @return the title + */ public String getTitle() { return title; } + /** + * Sets title. + * + * @param title the title + */ public void setTitle(String title) { this.title = title; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala index 80cb0667cb..c6d5fdf74c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala @@ -2,6 +2,12 @@ package eu.dnetlib.dhp.sx.bio.pubmed import scala.xml.MetaData import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader} + + +/** + * + * @param xml + */ class PMParser(xml:XMLEventReader) extends Iterator[PMArticle] { var currentArticle:PMArticle = generateNextArticle() diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java index 862d39a940..e3829bb7be 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java @@ -1,40 +1,83 @@ package eu.dnetlib.dhp.sx.bio.pubmed; +/** + * The type Pubmed subject. + */ public class PMSubject { private String value; private String meshId; private String registryNumber; + /** + * Instantiates a new Pm subject. + */ public PMSubject() { } + /** + * Instantiates a new Pm subject. + * + * @param value the value + * @param meshId the mesh id + * @param registryNumber the registry number + */ public PMSubject(String value, String meshId, String registryNumber) { this.value = value; this.meshId = meshId; this.registryNumber = registryNumber; } + /** + * Gets value. + * + * @return the value + */ public String getValue() { return value; } + /** + * Sets value. + * + * @param value the value + */ public void setValue(String value) { this.value = value; } + /** + * Gets mesh id. + * + * @return the mesh id + */ public String getMeshId() { return meshId; } + /** + * Sets mesh id. + * + * @param meshId the mesh id + */ public void setMeshId(String meshId) { this.meshId = meshId; } + /** + * Gets registry number. + * + * @return the registry number + */ public String getRegistryNumber() { return registryNumber; } + /** + * Sets registry number. + * + * @param registryNumber the registry number + */ public void setRegistryNumber(String registryNumber) { this.registryNumber = registryNumber; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 13f38408ed..ecef322020 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -8,6 +8,9 @@ import scala.collection.JavaConverters._ import java.util.regex.Pattern +/** + * + */ object PubMedToOaf { val SUBJ_CLASS = "keywords" @@ -15,7 +18,17 @@ object PubMedToOaf { "pmid" -> "https://pubmed.ncbi.nlm.nih.gov/", "doi" -> "https://dx.doi.org/" ) + val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") + val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + + + /** + * Cleaning the DOI Applying regex in order to + * remove doi starting with URL + * @param doi input DOI + * @return cleaned DOI + */ def cleanDoi(doi: String): String = { val regex = "^10.\\d{4,9}\\/[\\[\\]\\-\\<\\>._;()\\/:A-Z0-9]+$" @@ -30,6 +43,15 @@ object PubMedToOaf { null } + /** + * + * Create an instance of class extends Result + * starting from OAF instanceType value + * + * @param cobjQualifier OAF instance type + * @param vocabularies All dnet vocabularies + * @return the correct instance + */ def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) result_typologies.getClassid match { @@ -42,6 +64,12 @@ object PubMedToOaf { } } + /** + * Mapping the Pubmedjournal info into the OAF Journale + * + * @param j the pubmedJournal + * @return the OAF Journal + */ def mapJournal(j: PMJournal): Journal = { if (j == null) return null @@ -49,6 +77,7 @@ object PubMedToOaf { journal.setDataInfo(dataInfo) journal.setName(j.getTitle) + journal.setConferencedate(j.getDate) journal.setVol(j.getVolume) journal.setIssnPrinted(j.getIssn) journal.setIss(j.getIssue) @@ -57,25 +86,43 @@ object PubMedToOaf { } - + /** + * + * Find vocabulary term into synonyms and term in the vocabulary + * + * @param vocabularyName the input vocabulary name + * @param vocabularies all the vocabularies + * @param term the term to find + * + * @return the cleaned term value + */ def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = { val a = vocabularies.getSynonymAsQualifier(vocabularyName, term) val b = vocabularies.getTermAsQualifier(vocabularyName, term) if (a == null) b else a } - val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") - val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + /** + * Map the Pubmed Article into the OAF instance + * + * + * @param article the pubmed articles + * @param vocabularies the vocabularies + * @return The OAF instance if the mapping did not fail + */ def convert(article: PMArticle, vocabularies: VocabularyGroup): Result = { if (article.getPublicationTypes == null) return null - val i = new Instance + + + // MAP PMID into pid with classid = classname = pmid val pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) if (pidList == null) return null + // MAP //ArticleId[./@IdType="doi"] into alternateIdentifier with classid = classname = doi var alternateIdentifier: StructuredProperty = null if (article.getDoi != null) { val normalizedPid = cleanDoi(article.getDoi) @@ -83,43 +130,64 @@ object PubMedToOaf { alternateIdentifier = OafMapperUtils.structuredProperty(normalizedPid, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo) } + // INSTANCE MAPPING + //-------------------------------------------------------------------------------------- + // If the article contains the typology Journal Article then we apply this type //else We have to find a terms that match the vocabulary otherwise we discard it val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue)) + val pubmedInstance = new Instance if (ja.isDefined) { val cojbCategory = getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) - i.setInstancetype(cojbCategory) + pubmedInstance.setInstancetype(cojbCategory) } else { val i_type = article.getPublicationTypes.asScala .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) .find(q => q != null) if (i_type.isDefined) - i.setInstancetype(i_type.get) + pubmedInstance.setInstancetype(i_type.get) else return null } - val result = createResult(i.getInstancetype, vocabularies) + val result = createResult(pubmedInstance.getInstancetype, vocabularies) if (result == null) return result result.setDataInfo(dataInfo) - i.setPid(pidList.asJava) + pubmedInstance.setPid(pidList.asJava) if (alternateIdentifier != null) - i.setAlternateIdentifier(List(alternateIdentifier).asJava) - result.setInstance(List(i).asJava) - i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection.breakOut) + pubmedInstance.setAlternateIdentifier(List(alternateIdentifier).asJava) + result.setInstance(List(pubmedInstance).asJava) + pubmedInstance.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection.breakOut) + //CREATE URL From pmid val urlLists: List[String] = pidList .map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue)) .filter(t => t._1.nonEmpty) .map(t => t._1 + t._2) if (urlLists != null) - i.setUrl(urlLists.asJava) - i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) - i.setCollectedfrom(collectedFrom) + pubmedInstance.setUrl(urlLists.asJava) + + //ASSIGN DateofAcceptance + pubmedInstance.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) + //ASSIGN COLLECTEDFROM + pubmedInstance.setCollectedfrom(collectedFrom) result.setPid(pidList.asJava) + + //END INSTANCE MAPPING + //-------------------------------------------------------------------------------------- + + + // JOURNAL MAPPING + //-------------------------------------------------------------------------------------- if (article.getJournal != null && result.isInstanceOf[Publication]) result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal)) result.setCollectedfrom(List(collectedFrom).asJava) + //END JOURNAL MAPPING + //-------------------------------------------------------------------------------------- + + + // RESULT MAPPING + //-------------------------------------------------------------------------------------- result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) if (article.getTitle == null || article.getTitle.isEmpty) @@ -159,6 +227,9 @@ object PubMedToOaf { result.setId(article.getPmid) + + // END RESULT MAPPING + //-------------------------------------------------------------------------------------- val id = IdentifierFactory.createIdentifier(result) if (article.getPmid.equalsIgnoreCase(id)) return null diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/datacite.md b/dhp-workflows/dhp-aggregation/src/site/markdown/datacite.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md new file mode 100644 index 0000000000..c0c7560826 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md @@ -0,0 +1,9 @@ +##DHP-Aggregation + +This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records. + +Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure +the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping +of each MDStore. + +It defines [mappings](mappings.md) for transformation of different datasource (See mapping section). \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/introduction.md b/dhp-workflows/dhp-aggregation/src/site/markdown/introduction.md new file mode 100644 index 0000000000..9da46a27e0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/introduction.md @@ -0,0 +1,7 @@ +##DHP-Aggregation + +This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records. + +Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure +the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping +of each MDStore. \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/mappings.md b/dhp-workflows/dhp-aggregation/src/site/markdown/mappings.md new file mode 100644 index 0000000000..576c4b6bee --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/mappings.md @@ -0,0 +1,18 @@ +DHP Aggregation +=============== + +DHP-Aggregations contains different mappings from original data format into OAF Data Format, +which converge in the graph in different ways: + +- Via Action Manager +- Direct in the MdStore on Hadoop + +Below the list of the implemented mapping + + +Mappings +======= + +1. [PubMed](pubmed.md) +2. [Datacite](datacite.md) + diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md new file mode 100644 index 0000000000..f6327a51b1 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md @@ -0,0 +1,62 @@ +#Pubmed Mapping +This section describes the mapping implemented for [MEDLINE/PubMed](https://pubmed.ncbi.nlm.nih.gov/). + +Collection +--------- +The native data is collected from [ftp baseline](https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/) containing XML with +the following [shcema](https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html) + + +Parsing +------- +The resposible class of parsing is [PMParser](./scaladocs/#eu.dnetlib.dhp.sx.bio.pubmed.PMParser) that generates +an intermediate mapping of PubMed Article defined [here](/apidocs/eu/dnetlib/dhp/sx/bio/pubmed/package-summary.html) + + +Mapping +------- + +The table below describes the mapping from the XML Native to the OAF mapping + + + + + +| Xpath Source | Oaf Field | Notes | +| ----------- | ----------- | ----------- | +| //PMID | pid | classid = classname = pmid +| | **Instance Mapping** | | +|//PublicationType | InstanceType | If the article contains the typology **Journal Article** then we apply this type else We have to find a terms that match the vocabulary otherwise we discard it +|//PMID | instance/PID | Map the pmid also in the pid in the instance | +| //ArticleId[./@IdType="doi" | instance/alternateIdentifier |classid = classname = doi +|//PMID | instance/URL | prepend to the PMId the base url https://pubmed.ncbi.nlm.nih.gov/ +| //PubmedPubDate | instance/Dateofacceptance | apply the function GraphCleaningFunctions.cleanDate before assign it +| FOR ALL INSTANCE | CollectedFrom | datasourceName: *Europe PubMed Central* DatasourceId: +| | **Journal Mapping** | | +|//Journal/PubDate| Journal/Conferencedate | map the date of the Journal +|//Journal/Title| Journal/Name | | +|//Journal/Volume| Journal/Vol | | +|//Journal/ISSN| Journal/issPrinted | | +|//Journal/Issue| Journal/Iss | | +| | **Publication Mapping** | | +| //PubmedPubDate | Dateofacceptance | apply the function GraphCleaningFunctions.cleanDate before assign it +| //Title | title | with qualifier ModelConstants.MAIN_TITLE_QUALIFIER +| //AbstractText | Description || +|//Language| Language| cleaning vocabulary -> dnet:languages +|//DescriptorName| Subject | classId, className = keyword +| | **Author Mapping** | | +|//Author/LastName| author.Surname| | +|//Author/ForeName| author.Forename| | +|//Author/FullName| author.Forename| Concatenation of forname + lastName if exist | +|FOR ALL AUTHOR | author.rank| sequential number starting from 1| + + + + + + + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/site/resources/images/openaire.png b/dhp-workflows/dhp-aggregation/src/site/resources/images/openaire.png new file mode 100644 index 0000000000000000000000000000000000000000..00d320c39ba8ca5d352b2874397417b259c2beef GIT binary patch literal 21694 zcmV)UK(N1wP) zaB^>EX>4U6ba`-PAZ2)IW&i+q+U1;Cl4Lh>MgOsiUIOM}IgZIpJJ9lVE+SaPp~)s? z`V)(+OlC&J!voy!;BY*5{`E93f{^FMpOuBN^P`-ow zeo%^^pNH@NyYYRWimFc~$@~81%;rsUf4E}U(aj{~X z)tvlUE{qVF!5ebe!NLvaeO+NO#2im-e2sC%^nTV;izBYLC|Q5PjV+C|9cvWRV$~8q zjz7i{-gU>j-U^*7@4zc#;KKs%_`@If?|%5d`Sat0?$z9e;QNocVq8%%%`mib`kPm= zAmM)Jw7mI!f84L)#{X(z6DwHWG1m>}`KE}vn%5ky+p_I6TyR1!0mBr~%*XBJfyw}DOKR*UqScsBp zYAnr2#$x4Mh@X}_F=Qo`>`E!Mw9?BcR#VNj)LL5=Mp;WOx6*2Bt+&yvr=EN1wYT2; z=r-a=Kv)@dG&lMf!;Fg>A8owbc!k-FGtE59th3EN$FxQGth~yqtF6AqvK@EYr2nqF zx!w2JcETZ)PCn(-(@sC**d^9(y7`t{Z@c}DYd==)yQ;rkweY)@`|DK;-&NC!<@dVw zbJh59YrlL%5S$dP87mfZV8xqOfPju#Gv7nbQ7fl4^COZJC9-OvXmNJXim`(EgjjC) zv35UK?l0HPmHdy^E&kccIW67)#mYG?-FM~wxNiSu)wVrGNpFFag_={JF2KeMPPf=i zob=DX&XV%Jo)OJ$YtPAWnS@xakalr`o+C2ca!T=A;Fy`A1Ooe$_JSH~*`Z95^n`SS zi*=0JBCo%ILvObfO-sLFo;uF5+;?a(`!K55D&`(;-{fh$dCtv1N}3IXocYYExtz1t zO=#q?GZTaiX#JxI-Zvo z&U}1nwrbzU@9o7+1FV$jp%#7QE7HIznlJ)z6pVIswW(}B5appw)^+_}nHe3n`FK{QlH|R&<1&lVz2@fZ6}TgYIf?WwY+%^2-Vx8) zm8+i#)DPYW=CqAy921mc?%OP`XVMS-Q@IuXoL;Omg{dFbn#7(YJ+}f>j03I=K5)(^ zsYdd(9?DIm>I#k_-(l(c$-FM;qzG16ZYDIBz-4{dIlONr^#1It`_mWX0l}rhOy*L_ z|By$@H%{P)Al-SUG=p@W4{OBiZeU$A<0PkITY=kOR%@_vJ2Qo}++mZ{%i380Q{CUQnONa?6d52ZY#5tP9_3EcdZ)w*gb?hSKqEiu+jYS zM@HayqBb^Qc9w>JN~Bv0qz)^y3D882RbXojKyY|g>NIZ#jF5x7DAI5v)#muw++i*T64HT8NerNvsjMXvYvGod zH<{b-D)BO=Jw(1YCxC6Dv(ue7YzKb&%{!nlv?9>lc%u<>N$8KTG?S7#BtJk?<7wcj zi4#H=)65jdx;46X2Kqr5F&deLpX2VEhCi*%KsFKEXRICdJ^<86rm@Ow75i|(*Bldh z;4*o@be)k<4(mdkBNjf3d1PfGFs2W$ELLR7;wT^$pie|0IPe23(*VDVJ2Q+3mG(<+ z*daLO{4ReJTLd?2S;#dI9q-37b-?SenvvPOVy-nZjE&G@=65gkOnnFCm6p&SF*g|O zKf>A1ukL+=W<&8984@+|$erSfsxAL03OX7YbgDM$1{I8rH{ku1s2 z$}%j7simN_i_ARV%p6(RLktZ-ZS@M-ISgWEw5IzkR|2ymlrl>ZYh@UaC2{2M0$2^* zXqT)2=IT1&*F;g);p1KI@$W=)2J~Dpl+KCqV(EK^!Y4c$ZS#>(7ot(Z0lET|54Qy- zEF|hRA@T?Lwuc4uL(f$rw;^+%{fJ(6w4az%)XyhQ$J-wX1S%lc2V)GinP`V_wzM@SnZe9<8(_j1(6`qor2_)XJG%)&Mikb&k$ha#;JCpJfc14UVY^t^ ztkfWW!8$bjW?65vfF?s5kd0QrqX^{V&>2Gl95ff@7Kt)gqg?_5=0XpUWG5pFEr(AY zp&^l9_JPGz@cp-NK|X~16KsP?pm@koQEk5RayWzpUQL^xaW)hTXfXhWgNj`a zc>)Cyy>K~-7OAyGv#Q^I7&TJI-r*pxPL!z` z@Y!ki%r|J}aRR&CNS5v#Ap7YZA>cohk2v)DS-4KOd3;AbLYIIV;fF7qc%<2YNU5N{ zTc02;Ghh;UurQDbnM1xgqsR||?nu#%?!Z$qDG9;8rJOsy2bl_$z&$M4zzHNsY{*9r z8NUq>p>JZo`+Q`LI3E?VIzj|z0olMAQho^X)=o86yuo&?ERg(6T$E@cR#>==jN|vY zIV37ZV3E)Ia0^~@$?Vi#5IS565yPi4gqmayO~3++1XM8?K@NG#AcLzFNx-!cql7FW zf;9oS6x6HP<#1Wi-?`x{!QmB{(BG5u@sjP*Ea1|cs%19{0&>W*@ z%m^!bq=azvU@r8MMh!P3p2;3Y-b#`c%Ono&FseM~z_`H_fP>Chk~$WVM=K)I$Qwk&NO;8+?zzxvM}Yxzcc{#xJE(Ap6`US;!6miY z2CIpf1v*$sY&JORdQhtJVP2)UTUgNE=B{nG0_|e?7o+sZDFdb3hEZSmE-QU^8Be~>iJ$8{qMWP^Po`r*t@>t}(V|naDy}*qLj8KFf{OrIr z>aHOTZ+gyV<1Ql6V!RyTO*w1oU+K$~xfdtRCgnjO)6oeUNf%VeV5UN2aKty_#FHi5+58aPXMEP5mBfS zu8rSk;Bm56KC>F5qHaVP_&}7JDH!0Vn=?ZCuxdrM3}W7Qkpj*;hzm%KBUY1!#wgX( zS&a_Jpqm5vLDnp;LtsMS2$|}L`Bs_H4l}Mq-ZqaA2MrH8Lp7ZxW#>^5i$#{)KKIobz`f?F$!4XiS2hdh|lWgUJU zMi?JU$g)ewIPmTgTu0jh-V+5zguG|~qB4?D){m5fq+(=d5mv267UZo6=7sQsrGjHY z8gNd8fX`rcuO8Kwh{RNa zpYX@^uS#jgM`>~J5Uh?SWTsbJsl_!Af%rQr(ajgIq=RY&|FGKXe);CA7;30>2=8`s z4GLxrp?Pu&ZAA?7ZwJi@?X+1;)F)F(T0(xR`jOHRDXhZkBK%(Y5pczNs$ABgD=?Qd zY}^MzToFAVs6^2Q7apjaab1UlA9YvaP#00PRfF--{mJ0Voe@r3*rsMQM825SiDo}=g>8Z2oBzLb3#|L~Y& zL}l0=&5v3G0P5KR1X*f>JH9Xn7ZmWdR6DK<6nhEzS)Y`$`( zXS7@cvl2D#i{L2nIu-3>SCs%icbO4VfH=a1UFlD$O;Sgjj$U;r@F!{AfY}+D>a4AZ z(#miLD<5Sbn12vGpmh@H-0YTeGW2IQ5~=Uw^CtM@jMzic9(*G@2lc|5!Tn8A94s4? zJ1QBCmbMC@@FL0=h+~eI3%UD*%W@ zLkus7IYQNl*$fb&l;LsJv`jTAB33&i%^^35?3N{1YsSbR`h8p$wJU&Z#MT+O)wN%S z!64bReF?3=2dtfmpk(xkqp>@AhyY?>JCHZzI&w9rEPIwQU#uzr7XgzB&sgTJ^Ffwqh?UEDrRKVE&f?$cLtq3R286?0llHGB+r28fwUI9Z-*~s zvG9JdEx=E_=Yut3KCuHZG{R(=vUh1A3YrA^VaSYV1*uvUl~HG|5b6Xy;x>Pn5Y_!7 zk!=C?1m@dugK6+a5gU%*+e^aFjAy#5Hf&20SsizANS6R6ws??O-WxCz1wGK^m&p1mILzp${j% z@Y<*crtuO<$=Jk}ijabW=;&&dlB@Q#wDnjJsY4rB*!xwxGsp(O~oL7;X=Xel~IMMbL z8OmBAn~t2Fsfs_4Aa@wvVY)f%>y*I-zrEv|iT#TA77CbjiYK@KN+f;0y8nzq&^gO_ z$Q=|LOV;1!!~u@e8QLJ~q)vB)S4&rbxMn3kOq(%abCmSQ*avMek{Fo!d=dfOT1Ied zmylJE=EPQ$DNY4il`RJl)0!9_fX#5GilY9CCA|6zQD za6^>v?_iSP#R$=9Ggqn|0fhg>{#@5Gtb8`e=C8kK2!+#y=x{9;gz#Sb2HF-vN5sAq zaP`rqrha_(n@a=yBLRKRKzO4b;7RF!SQ243JEBX#d9@<|z8=;?n+>X|$=rNuH7iK% zzQQ;lzma}O8(jPbPas`%LP6i~hcGw@STwS>7}A9z7ZEd&nxK#BBDju>7=8SME++mm za^H|&Q^?N%@UwgdmKx~;69Pn#B908%5YT(t0V0_vDM^YjLckwj^Z5!?;f&M;cgQuR z#lG6cF(qjsU~co!kP*}v@kv~&Tt+B6(~4nkfops0ngB|#QVG^)cWr{8B+IK#V`f#- z2M?L#&WM!Ve0cV8JKz*zk$~{u&_#5@*9XsP2m%N8TcVJhBb?!MrGK#1ATYAhJQ@lib13JlV%`k{3Q=An#U%Gp3|1 zY+*A}T%I_;&a*1{LZiQ5tH&~34slMsuFG*sd%fYIYbyyfD)Bh7oL}Vx^U0t z=QlLHmt=_-JHqFo8v(iWd53*=9)J55bsvI83A2K<#x`e^VpT^6c>;vg8bNkohKveC zSLlA85~|9qiLUnNp!KSn*DY8Z`kRfGB1IZm}BVZ%a)ipV;ip*+Ig;`(VRs4ds zT5#dGqTN_BWH)qFjVOf>5qszX3ogV0jUGk;nb0( z|FrEx((?_g47#uDKx_3UY*1(f)u^+4SEt(li2~yy_z@Qt4wp*sQ|Kx~5 zlH+BZ)z^V>n_6&0ke&iy6`gU5pDrO5(PGJY0mtITLnq<8JMqrfM&J z$%PaD(t#v>8c5o?QP)Wz)blAMK5Th2FtssVkOyJQ>rEX|JH&-{LL3xB`9N)`MuG$4 zA_#f}KD4VhARzQgTJp3>v4mk7XJ+6RV$RW_SD1W;4?Ia|U0juUqV*K@9}THn)s$2a zM;34&L~kdDqD^chb9%YSVvf=A5OFD!=4!x1n_(eX zZO|+rMR9mXRn)eXFQh+w7UV0%%m{2`D9~~t17?hQ$B20Cn&X}zNym~6-jY>?)fVmT zC-m0|>eU}98G_$nR4P`YBCdAo!z(@rRxky4tkWZc{le|kD3j7_!PKv8l z^r8;bBLYt$m9zu-qD5OdYS2OdM<|=gT$&RRmJ}l{|LyZ(e1S%kgl0e@8Pw;{Q?=9G)iTNT*^ZYLRi z`8D4m2@L!Jr*?d@3bX64y-jLaow;PgK`+&+QA$}GT*zN&Hfd)I|5vYgt(2PP4m%U$ z>MO14bmaadhfN{}6wvl{$8xAlCKCZXB}N7JgT9!I zc3yk1ks{p23chus1`Oy^;|FYE?5&3>}$We_0uV#5@_YGuI+BpikWp8SddFs zjn1ogPTRD(!Zim=!;nmAz^X6oEPjIzS3*KYNe*lmK3_5G5Rd9xN6e|Xm$lgfHfw%5 zWY!7B;$-Qs`N&MeKTK;&#D*t$NKI>920;7`eIg>3e%sU`m68t;hy z3~*n-a0dlDYc;eRh}y(bRU;rn zYJ)%l+Y!zUT+ZaA^fHLsl?2i4myM*=Rj~u9as0wNs5@-MHMNJWT~L)ew1tl<^DHWY z<8>aY%bHmN&)k5UW0Ss2#J#F?Q@B)u%FwU>SoIi43DqyiC)Af{c8F)eLQ|;=#pI^i zveW{r(+QgEg9R{yJsJi)sgS9f5JaHvA+N2FF;LSgpZY1bLPh3t1U@xJ-zb^hR{-{BB%`D!6JwWN7NM7Up@3F z<0c@6=g6-b+hpi`OPs1%8DW=p(o5~b|=E(l5 zEeUAoO%VD^waP%mtD}+Jta*{q7$S8M>OwLi;&S!TUn9;JRjLMv9 zyU?qBj?mxI-J~<%-AZ)C6_yCF>&y^&5bR|gC zdF^9FcpeB+BOxjl3Lw6@EmUwrZmDPBx!YTPop0nj_^UG!Vd(!x2+6_v7igGrX`_G= z_IXppmFp(gtCgy0iPwNL;4!)n??~3blCKWAqc))?Gk*_wPhz(4;0)b@T^MRwe+5un z#E{YJIaKvD0btlAai1vKmd@f~vp_XW1%AGa`C*2HpHr&yp7+6cHnskpr0vNf-n;L( zlyBH9^{MwV9M^>VulAC|RIf&yQO(exS;!gGj|fkh(D20Ri1*Vwp%Ss~1pIuLaPX*( z+-X-%lCA|P+)snOdp1cY7U*@xlg`rLUh<0@lsuWJMMUP%sXjN~5imeg)x=kG>K=E* zJD~3+0io@)Q&VG35gzzs?bWinh;=Z$0~6XYa-nPgiu}v%bs=F|Jn(vDv%L-qNzSnD zuyhBE;1rYt3r;wOfG$0`JaTtU2PVi2t{xITTdk>QuA&2C4P~^oD)pT%VxSi@6pQdI zMlEJi0Vk?;2oYg8Q-mO?lNzbaf-fLB3x^6fmZNr)NlaN7wQf#D`p&PvN!6zrR5Eo_ z&9_s{HQK#ITg$R@G40IZ3{;R%0{}(@N%O}~3_5AhAs?rML!?QT`v>9cLYpVGV@Clv zrb;+hp;IOUBZbrC(f}j4+FOScwat1V$RWQo4X38idyIbu7Jtl z*0!q1Fou3Ufx^l?uts>rI*+=`R7YH(WI&VX3>^@uLV!*U9@xSZ(34!dNryBWmJIMr zDtE)Kier2bF&)m336)<%U4e!85mrcreh+X)CG2TBkH|Q`|bT&rVxT; z_aNDgbQs#t$WTL(o>*55gDAmRKy=wbX-h>T?JOoNmO8}myzN@ku9VtyEiJOA>R1GG zd@*;P1?(5;hI%I{NxkhYl2Q{Bm9cKm#Qdd(N zyq?sAG`5=(z5Z(VTE~ds$$;jXE$bX#(QbH9%f1Q|5n0D&c~Mcic_sVRNj!8w(kW;O zD#piR25qwt*N}zUoOJFA;i#Q}re5M0RVK%mahEYj6>U^PIXW)v7+vVv-AB(2c`l^1 z&M@J}=D?r_RiraSr`m48F2V)sW<7DJJ4+7e$!Z5cd;pyJH1$eL3Zf&r0Rh|?DEUek}I;mO31Es1D-liI)lxO`lo+!-^F>=u(0Gk2h z6PtnlLZfQa05Mf{Qnodv4p3qcgJq}(G`3sp_@CBOI} z#JbMv0r3ZAv$;iV)6Oc{5OetFMGO#ASO%4Mw3FQG>xxicA4n#Fh=!~>a3~-@qxS*` z>qL#tM<5>|aF%ToEi7bHxD+PqsMvV5z+t}7BU0veT@=W`)#JzgT2W`uTsk|a%4uei zHkl>O56$$Y^Tx;pZQf!G@#A8vbES`(^@~Ghz+1i4+6-FwD0FBfQ2}9fP|KtwosvcD zOl8O%`d|WPs0e@<^=GRy%6FZau2>?NL|b1`r%5rbLx($$Bx}4A9fRg$YH!0ntAJ9# zcpVzA+?tt=E^jrNA0z*ebl3jqA%k48Np@37&DO1+a@0RAZQd2Oh+x4U;x>~*AW$KAXSBm)k6Hyk^%{B8EF3|>sXwr z%%n(Uj^Y{c7q}INcDvr$1s#ykV=k&Ubhli{=Ma_GIdO6VLB#|;Hz5_mEDG2LAd2Ix z???>frvkSc*YymPrc*%n&LY_x6+VY`eKmp8KVKuBeK_rD4(Y6lH@Q0EGBaTewyOx@ zKPBlFi3SglK_VB0p<~*eS@#OGf^6K|X;0v`(VuNh>Jh23M3}K)3E4P=llDr;h=C3V zZzCx4&>>MhL!ijn^#wwjH7xdRI`98rhQLsPPyOMlzv|o?gx$oQQOzBy>Z*ef&fGDg z8nm10HVKqTU?@xEhPuS=o}^*EE)kUOwsdj}iGzh-4@_W9|J1wol(r7dyvrTwh zi*N~~s`Fh}8$d2;1E!#^RAWKiHBh^#HSkJtDLAn_5;hPZAtNO$tHG*YS*ER~1l-_s zL05am7umY6gsGw&WI-tIt3CgyPDQaNZc2!FR2Cw8)zi;cBpU%ErsXm!wC;`-P!Eg5 zFG7)sP#v>jV|O;y3+x<~5esvbJQRge(}RoDDmGkFRZ-=(S9&UHV^Jvw<9DZxcf#W% z;01Tj^i59!VD>uTx?qqcorLl~;W>OgPXH-Jq{%M+U2pxWKyFs{yEDe`kaXd;&d5{S8U5 z-K)+8sBn;xBodKAeTE2H}uE6k&Zj$jT+_SX?jEN{51 z%Qt~PtX_1)Fc4Y2PMo~wXvmZxpCzhHCnLjr?m1 zyqv@bgVcbfGbp1-(5aIV>HxaTx<{%tUsDE{VPLlaG{7ZkcbSwUz3w~zhw8>1pd*T` z+lifkcxNWDf@f6|e5xBoM|ruRxvn-O9e)}XH$0b)k_U11(IdBdf7Z?U}xMNKzXy^9^C9pt?5P_>P@ksZ7aO2ozBu zbByk&UxQ0xsqiKtXmnH03s`2$FpCDita#Kk*R zVLBHX!2}j5sR&S`>Y?g&RpCzCO|M%!YhOXnIRV%t_tEz+USDJ6v%V@<6Bg>&QUM4q znrbMhpJZBUok5yN0HWnnNBJivE>`#{87ai2@<^bj^12a~dVYl3`?Le7*8JSIh^X=% zqD>7kLB}o4>FCf^Uy(XDaDj0GwV$&fL#M%XsseQNi1embd9ns;dQ{m$$f*msF34e` zMqn=dA>_iqn+a+GX=JsY^02F1RR7Q-*@ciK{&HP?Lg)UvsiBYG!o(z)Box0U8#V_~<<~j&ZDQ-yy^-OaA z`}Nf82`>dA(5?la$*k!J1jBeAUL7lTZz$+Ml?%C>vB@r?{-SO7mOn#Q>8K|bLy!H^ z_GZ#Sctnp}ZP7tw?0Qw#WgFUjQg_WN+8*qzi(1UC9_V9rZ9YadBAEQ0 zM}-g4(D{);m7`-_=a}S}8VW7~7#MiN*1>I}ZIcLU)isj(SrvvV0sIQC1DCCeXr53E z_B31R9esJQKBP>0o5aAWS2KR^xTS{sR&hi+7_3g}3DVPJP$Ps-1=PJB#DEY+MnS?E zo@8{zR9@kJ^z1Z*3S6%PFej>u0nB|ZAXP`kfZBtA#O9*QF-#)BRMpeH5IXs1m@E~W zRfKH_kk5{qcFR6H?g%dENNKER*fc!AV2HYTo~*Wj44sX}2{C+M|DeAdfrxvfJE zNL`?qPEs?xI;I2vbJVb)9atoZp3oydn~;uO`>+j)Zii69OcXtLM@FQjLsCk^V2b))VA^ z&%w(jVRlL5sr@DcIebJk>Tt-?2D%CdHnqzFc%ZG#$Af8wjDb?+Ce)kRwEIY8 z!s8u_f`i0TR!qL0!pDzQijXDM6P?tSh=D3^Z~e@yBhW|_=}_%v*qYkr5tLvaoOr5j zCbpn+)Ra&^+w=*d(W$jq^kfG;hGae)sD^kjiWxjJ=s78Ay~Zb}f;g04)y%!_eA~TI zJzXF-_C}^y{oNB&U{Y!-Q00}%$0jb7Ou%GY=%cFkAk{Qu(ZS`#M*(ESvQA-l9X4TH zi16{3Yv_Kvh<{ka*W-E+L1k^75+JLC&eQ>b+-AGzTaY6uvUS3aY{9akPE8NC^5Ljb zsYHl6avEMIgc^hjh{8-ZLU}#!~%BpIVxF76|C{w0qMr z5B2iBjnV92ht&1ZlB7Dz&Tm7uv!qeye)Rkxfep9EuD!;s z9#_9r^S^vx2G&&9QkVVK<6;83IB&)qE(t~gEGSzw;Abb*qp8CtYP*AR6L#_QlsI)f zy}czpA1#M>eo-3_nGx#7B`LkzI@$T)ar1lA1KygRsaLce%B?h&K~;Z3<`Q{os8|e2 zx&_O*`#bp&{n2J7)Yx>cr0G#5_dVVKS@(`u!Q*caMI{b z$92lUbfO-KRUsg7LNdaBX={H%yFI<=X&oljf7HPvN#bq6q)`uAQz{bKIy1v!Y#olc zM-R@c@3D)j3@Vsre~0p=_f%e2`kZk(2$)`*b;@}A=I+GXsAJq;be6i9u-XBdyU#Js z`~-3XI)=E1w-oho3BngXbz>ybl!ly0Pa#qwGs1f`M}ND}=o?G#;b5k@QL*{RX)w;L zCywfY0ORN(g?f-dS#FE>_19n~SSMnVloF{FudcUN5+u3wXusq2xJEtEW2^>pnTnkS z+hCJ`B=AdO+C4F?LY0yFLH5O7D;g$2eJhsrG^++uV0$Buw_t^qnhmfU%!nUwKg!Mr zIMEn9R$DC^K__`IQ%pWI(oh*yPq)d@>xn=3o1-nIp0gg@1On=epPy2f1UZq$>}#u1 zZSVDRTkjbxT_}m4f?Z)fVaC&-I(f?N{eTPiPw_&}576n2hT1TkzD~;N7>6q=Z_ZM- zpLnEtEma+Q~XqbqL#MWm<$ z!dVM8wTBB|A+zHO+JdMD#bu_5fg7fQRY8sD#!W?( zD=P?R6_3c@-TgSdwb#$nSX0jmR8?P%19=A>M-1<<%@}$%4bZN$Mx_&}^R=qUit5Qt zZ)cp)fi_O}Q#-WiZ88bZP$d>8?^ide+Tct(!VU4qa;kWyBR|op`dK7M-$M~U&m(^*07*naRCwC$oq2eh#kK!G zBgtEKNFY&IW!V8jfU=kU5@?+gmU5wGDYq=4rG)}rfCwlNZvv@8DDW$k+x`kIP?kcW zKp+G`fv{5=mO#jASpgE1gzSlT>z(_@j2q>(9NVvCTg!Z&$Ip{kepk(R=A1KU&IlFU>-1E2$sJ%*7ZYzw&}Op> z@Ezbh;80+M7DIQDz$Rcm@D%X0N3e2E0gA;4q_{?lQNUHeg}~@6o_YiDIB;h)t8 zV#qgP3<7=(TnCKo#Y=4j{#Og-Ya7Q!sx2hos!t1+<*{lj&bR*4+E#nXX^7O@eCp^uGy$??GS9R5xPaG8YjPrEH)1Y+n@zhfwH50L97ObKvj zi%qo=h@o%%ju{C7+z9NSC$~Htm{Ai+l;p$B`b-n+UxEQ^08jx821@m>TYxP>P+waY zZraiDQnD{_d5-!WopCB~67XWK->3=m_A&<^Fpc`H_D;ZVz&KzmPz~&@e>V{D>Yw%z z-3F5SXA6)3mIKRx1^VZ!z0N&rQ_0Zb_oG6(S)+Qmmmi%pgzC|ytX$X1L$7V&{S~d* zel>Fiocvk`N5seebNFw!|y?(wjM1DyRKxBai}O;>6ooys*AKh5Veo&yf<@Au{@Ex}hYs8C+-NK}CcGlJwB z)j`W^+e`%R*5*`h^lAgk@RpVp%4;5`1(GlV$*)mkAaFBqC6H6C(NkW6r_|k)%7_6T zzISK^LkFfG1ifCNB3TYDIA##dE$O4Njm?s0K5XRkO|HVgrS$GPc_WbCFsXotP!X1# zOX2XARe%7IwOh}{b@bQ)JN*2R>I$xVWF4#PT}^2W$kn!Be2|XfI1qRV zl`E8+`AshGYWJZYf}=}&<(5Y6?Bk%_UFRPfA0*c~{IFYDzpm|A;4xtLLVU)C7D-E+ zq^uy}RLf)MVB0KDcBfupX6}y9oFkSc@v$iLADB!{RDS{_tR)%Yx))&^b!t}iJO^A<0i(%UQ1<^q$* zucfx>cq^B0t|$d!+nj?+Q_oi8S^?Z71drC%gos5Ri(_=2AUj*Z2`5A>lp(Ax{Vmgir`65tafklZq@ZbPjP&y;3hnh=~Fi-$k& zr$>4~i!^K0w@6wYYtL;7O|89RF+O_Y-X#r2AUVKM-<*zZb2e~V7EZSrnB?&=wJ%4F zH2f3D>W)sWNy%&T>q*+%#PR;(23Bp&`m*s2z=NT4N91(7DVnM#dhPf}sB$}*upT%s z77xFa3(r*(Nn8!wmCbW{JuqRPGO9`P_~Qb|yU-uucM^xJyc`2M&wH z!;AC%J=8=J71|HG2^iG}m)s2e4!B*<(O?9UV;pnzbXFx=Zv(D%d5XWs;^FTX(mSb% zB=*$r;}UwO4pl4gEO2v~m(ONh>o17+R*)vv>Q${?;J;iRouOb zcPlFy$jRm=8dj}XE0f7%1)LiV`3puI1v`QS1J+333*2pawoIE>n<-#wB8lO^vA_h> z%F5k=@*Q@XQX*}eWJ@#68`jeB<>xefv4Yn1Tbh8~WAX61LVHhznrd=5YKt#7yjtku z&?Cw^Z4y1J1+(}11s~idllJ#~d?==$gS(AmoyuJw+8x{^6U%3V$}x=7TyHlHhW+v^1B34(Fmjh7~=BSS`0Cl z#|_r+-UuYe8H~I1k~VU<%6K2L8Z}wkQ%Z3+?_>nh4+3#R5|zPQ>@>L)2&nBEj6e#M zR@B;sVrP>cO$|mMhE7IOXLu3R?$tARaIK08FUUdFwY1ie;oC?4E&IBQnG&$`X2`Y<6ItICj?&@f#eA# zC87;@#pU4@zz>ShygDCMox}}u%gRL32qaHfv}B3|Uc!CS-37sbHMCfi5e!%(fb%l& zKVJH5(G+O}l3%=o`?QmhWWJ|iJ~0XRYdO~Gca0H99#L8*R-h`3yK-ZI%Yy-H*Fsl@ zu0nYQ?(2xXTT$Wp$_OOi%&f&zB=Bekr`QX)YX7m*M4`ST{cJx6_Q>EgkLY3#Mj&}e z6v$-zv@4n#?ShSHH8tAffXg#D#aFlwh8ThLyO~ug)&fsuaEf8TT~(DqP`RZZxc9R~ zH%TLqJZ87u%N7AoXK)Gu+|b-Czf~yjq@_hp0B*>@F$b76?tno{3%&gm>=taP1OwKg z!0TjI`R@y0LM$F$l;`ha9I6WYZW)dqC&c364+_2Q6h+Ynua}P?x3{N#5I5QRx$Mfv@1KQL)D8o~229Aiu!%K>x z-4spPAD>SwMn!Eh?GX+Fo(=}A>f?_6P0l~}AqU?=Fkp=Zp2>nZByfk%CzcjVyD1uh zELk>10yA-6z1%61*S=cWa$K%I_oj`>6M&a-zmztW1;Fk4T^0pJV`8P48YcnIk)2B9 zn}I1p@WN{r-IW?>9{Z1J)QRc?!5aOX2|do!852#nxVmZUoVz$RJc2%F$Uo?RsDu za9=DQ-r7&kH3axEa6RtVmZ#$lREWFL2&8yKQjN+Z+9Qjnm8d=b?gXCbi|wS3JYqU) zziu4@d>{BFurHZS06O^`7$1v=>x!|R6sIUxS?&Q`1Y7`AWmy=YDpZ~ki-%t=);3bq0@00ZlJ-e@Yj>muJyOjA-VlO#ZC$w4 zrQ=;HB|+d=;J<<6fdRdA-_5{f<>lg`LR~Fr1kw%msh%p@+T^vGS2SF0*#5(al8_;K z7xhI<2+?~Vee_O5i7qil7lP;}1`#bf6Fs_7GI<3-5TlpTqeSm5`1boh`X79!-_h(@ zYp=D}^X&V+uitg;=NY!@IBfd13Z0f7h{mZ7G?3LZHAd)PWjs3WIBz6t0hc?EKSwiO z&qKT>39VO-;)3J(i`|KEWd9F$=VI^0otjPTIydcXuW|*C@JXyK4YGloAqml`-Sgs! zicOhM6I7q3$C7+_ zoa?|8O{KXG>_0yigoUUb{vcaKnalnyyf|&{-G+U9_eiDV$(`G~?HbR$U-CyZM;xfU z3rdpwNs3!Zab&PB=wA-FzkPth-hwJSKmGztj6sjFE^S}2EbY0Ffei<;k>)&lm~V|H zm|>EXJGiJ%(>QsQ`_NxvP6!*S$V~^X5m_jw()1?%P3eiqMQO`Iu3N%Yx-i=cJpyzg9rl#JD@`F@v)u4Ast z*kWCkLK6E)Z4FH!6A)`hWni4&#XgUj@BbhKP4P%&<3A|EP5a+fFQ$SryZL6-L_|?? zx63~Vvu&pz$+wKO>u^y<7Tk)oniB)s@y(vdL6{+efn1&AfBh>=D8i7aNdE5U=1AeE zKl#7Ec&Za{MZnO04+t9}&uMGbNlnKmGPA0$p}O3#2SmsI)W~;#lNdjF|5F}&>P#SL zOMbEA5SU@(d}L~7!6wsZi>9#DQ!`;=|Kxo91-3ceh&{rtusu0&YJ5H`HZ4!v3&C=s ztEh;|&+&l(R|#gU%TTq@n;gER-oK6D2YlMI!3AQ2bh`&7>)J?9{74vC$i1I;KLE7$ z!E5dXtq;h}cU&h!wwZo3n~mbaY{!5jXyu|03*HRuyCzFX}StSIQhOYw)NzI*GAzjoR z8?u~U&BK0gkk>RR`F)?bGU1rwhQ|L2My|M%aTX?QuUifn%~G;62(%A^Epe5I-2cSXku_WJF0W>bO_4>88VxX%gd^M!K%4u4Cf~p zBDt}oh8EB2Ry4<-HxdrA#Yk!uuRd6GliqC_Bl@dy#o2aW3A?hSiHs?V;>gc!*#qgj z9`!SlpJ%4c8JW1Ie?-g)cJ@*lkN4_z8&N~I(`q`O`bd|z8`^P1^lHmKUa0nej}nX+ zV3CU;vIX%5%Z4;W+QgWG^i4^s$~?0p!)K58*P($ww2K|5TfU4sL(BtGUB@jDO7J@+ zMi~z!L5)=^c86JrptgVSZ&yX$Z}U5pFdJ8W2v!Av8!x#agH@(Kn=Q0SDEstv=&R>B zphI2ZpOYV%{2}dGHJ7zs$gOuL2xTBQFDp2`zKUMq(Q)-?SSP#{_mq~=iRG3J8l$&4 z{#T(Qj++gj?of;82fttbxXorujOvPDoZ*Jc5AR|{u=RM!aW0^~<~<~pjZG|_FunfL zD`#ZihT8;_jU5hriYRr6Z#MRIi3ysw+?ncLfLW0Ds!DM^O>*E_DbjNUL_r(KtVsUq zXr$oc{$_^;DzJH4CXwj*gouM%AgMvsnAP8nSdwI9aw>{xw4wfK(E0H{JI6b=>a=OT zmv8@DFpm@j#f$$^%JpBqG*U*W6#d$W6~WvhLI?K#c}xh@>3z3{WrHKse~YDGW*TSw z)If&9Xjxu`yg38CQ8cmFagz11XOp$`i&q?XV@O@;)Z5 z{XR>^;noX}?2ZV`soh3hu15a933~wT>+a;S9wu2$HSkLjKBH=67szRaef@!yV>5l6 za1t`nDey9|WN;TV0-Pv5)3*Oa8M0J*eiU}4Ln|co+RSqzabz7Iuhn-U%tasxEjYtb-nClq45RuYsNFd)xu_ z;$f&|^CxwT?L@GmWuNf3-ak)4tl3czSMCAEO6Xg7qo7+wgfBd*YdX@S!P6GNh)o_I zhnr=uZCv?LzaC-OXPH>Ion}W`J1e=lsI2W8g3r@=yVFR`x1J?3Yq0v!wY6b=S_l)l z`B(b4y>p#g;(ct`%v?#dRm13&nLTFZXfze2=(e7wofBk5XlyRQnm4?u^85-XDW{A6 zW!)601U~z$mV;C_;nL9Rhp+yd?RJj{WGcGPV9*%p3kG$PeL)nK-fV)|R)fI(x>Eho=93234iki3!v z|DZ>#e8gKYe)%2QQr)>Rvc}wSZN0qA^OH*VYbl=O*-z%$6@#$iVbb?#N)LYqTc9oR zk=o5cM0Qf$>qRSK*XqbJqpK&>qVCaY+;DVtZm40NY*JC6qyW8#vBVQ2f}d$9oAS34 zX)|KJ;z44U%x=l=#8UFqJ?qk>=0~5DII5bIoTS3#I@waZ?vxe>CbJ$0Ek@g8rMc+9 z{%smHaoogyxLDhVl=`nuJKqZg9$WV=*v#ZO@(K`qkzll@hQiOTnBQhqWLC<^@m0VA z&G1V>o~-bUVxhnqUQ~-1(_xL4A1+r)dca?p@nf7KsyO6T79Jz{IEORYbP>AffN$aZ z)6?>J5G`pjV=e5zt1)}Uoc_T~LnCL|95k0Wb9o_bTzP%ZK8`E@RZ5!m#ng)$LD7LZ26@U&w(6~Y(Xe9-%9vb;05Wwo)3%dx1HM6Cf?Q8-5&kCc{VDt+9v2T68}u7 zqY@lO@c7T#FyR=yYhQ2Z#6WCS|2V&%Wg;?f38Q=6!`S`0lwIt!$LgW-`_kq=Zo`SE zwvJN!LdY0jhl70((3f5STz*r)z_SoD-jfs}C+*ETn1BhyX=Iu;Y0g?pPcW7WL>y2F zyjui-+Dt$u1Ay_F8}&Z4cfsyzi+P@0QoW==@%z$aZ8^goJ|ez7>AF>AyEsWR%$C%%+|=VDvCb(&j+rIAU}~UJuia4*^o^}VOU7Np`ABt4QOX(UIypGI zD5t~IDDN&ezRLbDlcXRW_|3-O#j55Sl9xbnDoY=qRiq*PNg!@}R6rO~%Z4ygYbBR# ztH*x?R`kvui!nz!^p+mCXKaiV=88_q52_Hpp?jM*J`tVG^3_{%bK^4x-@t5#iH)scQ^ zN8C_=Wj1OEw%SjY5+$M0{e>*O`grH%5B9ytttvP?>|8bUVW9Jv$h)B0XsI+e}_57%ub}il?g%ZJX&gwCf z<=_XQ6YB<6T43$X+T;m&Z5R9|l?OY-+a+1wW8up4OfA@a+WAP9b#xw?_9x5x)UarF zCMmg=JLxl0T*U>@x++NZ702eNiF6S`K>ggnq+Ucwe3qRig(usuNFU3y<402e=x+GJ z=wk-QMGtHacY`G;$<1pBB(o-W3drF@skt(qBlAB_8m*bEtz7&U)# zMX=y>RqN*;r~r^p&gS4%c=J?PSfQ~@-Ja8hTJGk)68FA3eedHxOV2uS^5gk5)DC7H z#7?SN@0JjOXw5i|606!#JPj+fSIwCVvELgkBQ44j_KVT$EA!{O3-<}9=f;E16IkEuYpBd0_A_NVat))8ITa&zEpzozIpKC} zG7~+W@TzmYju;)=KBUb}WXm+XXI<@5dzCXic|mZK)}uj~P6#Kyj^YEPQ_)j8PwP(0 zM)85@(@rd7*)84T8=T~idLU7nloNKxbls7LP+emhjK|8pL^kIu$9RQ(f*uV8rPF1n zpi9a+y3ee3h6dZ_d4Zv&I(S*O^_}m`jQDl%i%TxIR%f+-m&qqciH&h^KalEwlfMc15^$e8G+!E5yXbD6d~U_j0{foT)1J?bX5Zx&-C z6rm6(`*^p9{{b0s<(Dr3EdwQ~fI+LUb8tKN1;gkY@IDM#7U}99ml=v$M~#&E{AGt8WqF3QyWUtYIwz&6bgCo^raRMQVGz) zbE|)-7wzr!gzc|6fLVf5d6hW2>?8fJU-{qL4$=7}fy zIOxn>;{s;%87dFqfY4%dk&_S2WuJ4?HRT{1D8LkV}31ShJV5oH_1gDt=n1(j(^G>U&U`U@; z?2ylT5<-AD+&$O_c`L+IP_8A2%hY^JQ1P&3cqw*v=D=MdVNJVF5N)x6fB=NvTH@qz zTG+j5w2B12bxl-y$r*t6uC}OzHQ7z3&~e;aFn4)bS5A9Te3bLa&r5F@|8H9j{ZJEK z?gLf3v$5M(oS_aDD%l|nCPtnz19UA-yr_@V6c=sO9GMo>(78W{;u8gRKg+z=e|<<# zXwJ%BEy0kyD%H>@sdjT~SO;ps+E~*^H~ty}7MR{s<*<3+2lBCi#%*jWKL!h{X+0n% zuKcWQAV?%+xSWn6K)-KL>xKT9jJ8Kyy>Z+6x9VHCQvq-HOmLYJcEK{siJPJJX5#?( z3I_AlN&gMyWsKLl*RG`S;)7C<0gYg59U&=^w`GY-$>OMrfpb^4Hn1(~6jn3;!6;9J7Z{ zPJf8LkS_l$qwXQ)(+-mIv;Bi`j=E7XCqTTDQCuC_G^l7~sh5_SIvrg3@C@vzD)g)-CdezZAC54ev+EMYEYk=oiqY zzdx%mmu>&98}n9+q*Wn_LNQliGl;{tFpPxkeL8^xfUS=nj0&8Ns zhd}u7j#R>r#D`k*=24HD{b7Nn?TDF=6(`B@u` z5#01ER2s4`#Y(^Il%SjIJPBZ`ld{hT#LNwJn0IF;+$^F#dQm51`Htpgh(kG|)Xe{+ z87;uJ0?Zy}>Ct8o(3hq)`g4_bH-Ejv_vv<6J{gE$kM?nCCC$pU?{+3z#YSJ@wJo&< zRJjDk`NnGkdJkOmrBu_1y!J|2=S}qSZf-^L*<<2t zbv1NRw0F-GLt7V)gl)O_B+etHCvabrvK0CdQ=!ztzA;>pwoTF%(l6z_XSZW!ZWb9l zaogXUq%BV{{z>k$=YhzW?&*($=5-$NvdjBc?B>l)p5ks4-}x^uLGUh{XaU%9?4+Qr zjpb}3Su=c^hU_O`bPEli!m>q33YBL;a4w4)jUtH%z7Y9?O_USsaOfNvSJwQ;wo{TB zoms~7)JehlouPUmOWFU%;Pz^N zdJtdf4BUYxR6x_be*x15mx7Pu^haYoRRGwaD?eZCM5~Tv$7Cd=Lv*nKfE@UaMeUBD zi+h7ZKJA1Sz5KBt`v}!yUY>S+-KQ72NVnK(2u^65WB=Y4u~G`N^KO<=EaB(#m15ZJSSxCq zWv_1tG4#v6UGtzd113*@xFfq+lrgKO{#~3E+>C=*#m?bgGvogs$I#R(VzR_U8RCsN zlm{_odRm@uw`qTgGgvncBr!{@{TyUDOlngv;048i^shpelg`#{ga#)ag&D|+Bfr`I fAO8G^x&`IaE6Gy%+(ZILR_;L6^i->r?ce?nUh-Rm literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/site/site.xml b/dhp-workflows/dhp-aggregation/src/site/site.xml new file mode 100644 index 0000000000..da5da0f1e7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/site/site.xml @@ -0,0 +1,32 @@ + + + + org.apache.maven.skins + maven-fluido-skin + 1.8 + + + + + + + + +

+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 02bc5d8d4e..d8773642ee 100644 --- a/pom.xml +++ b/pom.xml @@ -550,7 +550,7 @@ org.apache.maven.plugins maven-site-plugin - 3.7.1 + 3.9.1 From 7c804acda824db34454e0564a7ed28fd43052a79 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 15 Nov 2021 14:42:43 +0100 Subject: [PATCH 53/60] [graph resolution] minor --- .../eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index e3b3fb52d3..e1341eb3fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -45,7 +45,7 @@ yarn cluster - Resolve Relations in raw graph + Resolve Entities in raw graph eu.dnetlib.dhp.oa.graph.resolution.SparkResolveEntities dhp-graph-mapper-${projectVersion}.jar From 7d0a03f607d0b41b221c5b62010b27f10f05b7b6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 15 Nov 2021 14:45:54 +0100 Subject: [PATCH 54/60] [graph resolution] minor --- .../eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml index e1341eb3fb..ceb13c5e8b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + graphBasePath From 668ac25224c093b6b06bbb7e6c51227f0a5c6c14 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 15 Nov 2021 17:02:45 +0100 Subject: [PATCH 55/60] [graph resolution] using existing argument parser file name --- .../dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala index afd195ed04..316b8afed9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/resolution/SparkResolveEntities.scala @@ -19,7 +19,7 @@ object SparkResolveEntities { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession From bafa2990f31b4c39af95be9393f153e1b4823cfc Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 15 Nov 2021 17:07:16 +0100 Subject: [PATCH 56/60] code formatting --- .../CreateActionSetSparkJob.java | 8 ++-- .../CreateOpenCitationsASTest.java | 38 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index eeb86a8ff5..ea5fea96f7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -65,10 +65,10 @@ public class CreateActionSetSparkJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath {}", outputPath); - final boolean shouldDuplicateRels = - Optional.ofNullable(parser.get("shouldDuplicateRels")) - .map(Boolean::valueOf) - .orElse(Boolean.FALSE); + final boolean shouldDuplicateRels = Optional + .ofNullable(parser.get("shouldDuplicateRels")) + .map(Boolean::valueOf) + .orElse(Boolean.FALSE); SparkConf conf = new SparkConf(); runWithSparkSession( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java index 7567f855ba..5a04dcefe8 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateOpenCitationsASTest.java @@ -84,8 +84,8 @@ public class CreateOpenCitationsASTest { new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-shouldDuplicateRels", - Boolean.TRUE.toString(), + "-shouldDuplicateRels", + Boolean.TRUE.toString(), "-inputPath", inputPath, "-outputPath", @@ -101,7 +101,7 @@ public class CreateOpenCitationsASTest { assertEquals(60, tmp.count()); - // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); } @@ -109,31 +109,31 @@ public class CreateOpenCitationsASTest { void testNumberofRelations2() throws Exception { String inputPath = getClass() - .getResource( - "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") + .getPath(); CreateActionSetSparkJob - .main( - new String[] { - "-isSparkSessionManaged", - Boolean.FALSE.toString(), - "-inputPath", - inputPath, - "-outputPath", - workingDir.toString() + "/actionSet" - }); + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet" + }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) - .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Relation) aa.getPayload())); + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); assertEquals(44, tmp.count()); - // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); + // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); } From 0a727d325d5d0ebd4fb9ed3f29482cf6e472b7c2 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 16 Nov 2021 08:43:41 +0100 Subject: [PATCH 57/60] [dedup] increased number of partitions in the consistency phase --- .../dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml index 83a47ea6c8..4ea0039263 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/consistency/oozie_app/workflow.xml @@ -89,7 +89,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 + --conf spark.sql.shuffle.partitions=15000 --graphBasePath${graphBasePath} --o${graphOutputPath} @@ -114,7 +114,7 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 + --conf spark.sql.shuffle.partitions=15000 --graphInputPath${graphBasePath} --outputPath${workingPath}/grouped_entities From 49f897ef29721af14960de650e187a71cd485d99 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 16 Nov 2021 15:24:23 +0100 Subject: [PATCH 58/60] [cleaning wf] fixed regex used to spot garbage in result titles; adjusted threshold for filtering titles --- .../dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index d8b1cded8a..43413b311e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -27,8 +27,8 @@ public class GraphCleaningFunctions extends CleaningFunctions { public static final int ORCID_LEN = 19; public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*"; - public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]"; - public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10; + public static final String TITLE_FILTER_REGEX = "(test)|\\W|\\d"; + public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5; public static T fixVocabularyNames(T value) { if (value instanceof Datasource) { From 6d4a1c57eebc04b475f4dfd6c10aa3c105553e96 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 17 Nov 2021 12:41:52 +0100 Subject: [PATCH 59/60] [Resolve Entities] Change test dataset to mirror the modification in the creation of the map between the pids and the unresolved --- .../dhp/oa/graph/resolution/ResolveEntitiesTest.scala | 3 --- .../resources/eu/dnetlib/dhp/oa/graph/resolution/dataset | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala index 9a142d3c0e..46bf48974b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/resolution/ResolveEntitiesTest.scala @@ -52,8 +52,6 @@ class ResolveEntitiesTest extends Serializable { } - - def generateUpdates(spark:SparkSession):Unit = { val template = Source.fromInputStream(this.getClass.getResourceAsStream("updates")).mkString @@ -152,7 +150,6 @@ class ResolveEntitiesTest extends Serializable { - val pubDS:Dataset[Result] = spark.read.text(s"$workingDir/work/resolvedGraph/publication").as[String].map(s => SparkResolveEntities.deserializeObject(s, EntityType.publication)) val t = pubDS.filter(p => p.getTitle!=null && p.getSubject!=null).filter(p => p.getTitle.asScala.exists(t => t.getValue.equalsIgnoreCase("FAKETITLE"))).count() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset index 05c875148c..c22dc94e34 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/resolution/dataset @@ -1,3 +1,3 @@ -{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} -{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} -{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} \ No newline at end of file +{"author":[{"affiliation":[],"fullname":"Greenough, B","name":"B","pid":[],"rank":1,"surname":"Greenough"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:55:00.639Z","dateoftransformation":"2021-09-25T11:00:04.201Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Heritage Education"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-59-cjhf"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17632/96bpgw5j9d.1"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17632/96bpgw5j9d.1"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434801681,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T15:29:45Z","harvestDate":"2021-09-25T10:55:00.639Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323","metadataNamespace":""}},"originalId":["50|DansKnawCris::09821844208a5cd6300b2bfb13bca1b9","oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:211323"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Interdisciplinary sciences"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Heritage Education"}]} +{"author":[{"affiliation":[],"fullname":"Keijers, D.M.G.","name":"D.M.G.","pid":[],"rank":1,"surname":"Keijers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:41:59.767Z","dateoftransformation":"2021-09-25T11:00:19.238Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"onderzoeksrapport"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-das-fkq"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xsw-qtnx"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-xsw-qtnx"]}],"language":{"classid":"dut/nld","classname":"Dutch; Flemish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434847381,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T13:53:29Z","harvestDate":"2021-09-25T10:41:59.767Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:20759","50|DansKnawCris::0dd644304b7116e8e58da3a5e3adc37a"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"PROSPECTIE"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Lange Ekker te Vessem, gemeente Eersel"}]} +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofcollection":"2021-09-25T10:43:13.768Z","dateoftransformation":"2021-09-25T11:01:22.863Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This find is registered at Portable Antiquities of the Netherlands with number PAN-00054604"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-a7-hwgy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-x3z-fsq5"}],"collectedfrom":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"hostedby":{"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"alternateIdentifier":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["","http://dx.doi.org/10.17026/dans-x3z-fsq5"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1635434508886,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2021-08-16T14:01:37Z","harvestDate":"2021-09-25T10:43:13.768Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","metadataNamespace":""}},"originalId":["oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:129566","50|DansKnawCris::203a27996ddc0fd1948258e5b7dec61c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"early medieval enamelled disc brooch variant A9"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: disc brooches"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Broader Match: schijffibula - geemailleerd"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"metal"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"copper alloy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages C"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: Early Middle Ages D"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Temporal coverage: 800 until 1000"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"PAN-00054604 - early medieval enamelled disc brooch variant A9"}]} From 82a4e4efaedee1e3796f0edf783a40c97339734d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 17 Nov 2021 14:17:22 +0100 Subject: [PATCH 60/60] [cleaning wf] fixed methodology to rule out invalid result titles, based on https://support.openaire.eu/issues/7206 --- .../oaf/utils/GraphCleaningFunctions.java | 19 ++++-- .../clean/GraphCleaningFunctionsTest.java | 15 +++- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 68 ++++++++++++++++++- 3 files changed, 95 insertions(+), 7 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 43413b311e..592580ab8b 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -27,7 +27,10 @@ public class GraphCleaningFunctions extends CleaningFunctions { public static final int ORCID_LEN = 19; public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*"; - public static final String TITLE_FILTER_REGEX = "(test)|\\W|\\d"; + + public static final String TITLE_TEST = "test"; + public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST); + public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5; public static T fixVocabularyNames(T value) { @@ -195,10 +198,16 @@ public class GraphCleaningFunctions extends CleaningFunctions { final String title = sp .getValue() .toLowerCase(); - final String residual = Unidecode - .decode(title) - .replaceAll(TITLE_FILTER_REGEX, ""); - return residual.length() > TITLE_FILTER_RESIDUAL_LENGTH; + final String decoded = Unidecode.decode(title); + + if (StringUtils.contains(decoded, TITLE_TEST)) { + return decoded + .replaceAll(TITLE_FILTER_REGEX, "") + .length() > TITLE_FILTER_RESIDUAL_LENGTH; + } + return !decoded + .replaceAll("\\W|\\d", "") + .isEmpty(); }) .map(GraphCleaningFunctions::cleanValue) .collect(Collectors.toList())); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index 42d9f226cd..aa9535ef7f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -8,6 +8,7 @@ import java.io.IOException; import java.util.Collection; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.IOUtils; @@ -137,9 +138,21 @@ public class GraphCleaningFunctionsTest { .stream() .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); + assertEquals(5, p_out.getTitle().size()); + Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out); - assertEquals(1, p_cleaned.getTitle().size()); + assertEquals(3, p_cleaned.getTitle().size()); + + List titles = p_cleaned + .getTitle() + .stream() + .map(StructuredProperty::getValue) + .collect(Collectors.toList()); + assertTrue(titles.contains("omic")); + assertTrue( + titles.contains("Optical response of strained- and unstrained-silicon cold-electron bolometers test")); + assertTrue(titles.contains("「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳")); assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid()); assertNull(p_out.getPublisher()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 6795ccf1ba..b3e3024746 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -864,7 +864,7 @@ "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title" }, - "value": "Optical response of strained- and unstrained-silicon cold-electron bolometers" + "value": "Optical response of strained- and unstrained-silicon cold-electron bolometers test" }, { "dataInfo": { @@ -887,6 +887,72 @@ "schemename": "dnet:dataCite_title" }, "value": "test test 123 test" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "omic" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "「マキャベリ的知性と心の理論の進化論」 リチャード・バーン, アンドリュー・ホワイトゥン 編/藤田和生, 山下博志, 友永雅巳 監訳" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "-" } ] } \ No newline at end of file