From 50cc21d92e09805934c5b73060bd10803b35a77e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 29 Jun 2021 18:35:28 +0200 Subject: [PATCH 01/37] Added method to normalize doi values (lower case, remove all preceeding 10., filtering out doi not starting with 10.) --- .../doiboost/DoiBoostMappingUtil.scala | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index a6101c07e..1baf55b89 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -38,6 +38,9 @@ object DoiBoostMappingUtil { val OPENAIRE_PREFIX = "openaire____" val SEPARATOR = "::" + val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)" + val DOI_PREFIX = "10." + val invalidName = List(",", "none none", "none, none", "none &na;", "(:null)", "test test test", "test test", "test", "&na; &na;") def toActionSet(item:Oaf) :(String, String) = { @@ -352,5 +355,26 @@ object DoiBoostMappingUtil { } + def isEmpty(x: String) = x == null || x.trim.isEmpty + + def normalizeDoi(input : String) :String ={ + val replaced = input.replaceAll("(?:\\n|\\r|\\t|\\s)", "").toLowerCase.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX) + if (isEmpty(replaced)) + return null + + if(replaced.indexOf("10.") < 0) + return null + + val ret = replaced.substring(replaced.indexOf("10.")) + + if (!ret.startsWith(DOI_PREFIX)) + return null + + return ret + + + } + + } From 8b8ffe82dcf81fa5858b049773064023360d1fd5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 29 Jun 2021 18:41:39 +0200 Subject: [PATCH 02/37] added step of normalization for the doi --- .../main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index a7f97aaf8..cc758bcae 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.uw import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication} +import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -53,7 +54,10 @@ object UnpayWallToOAF { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - val doi = (json \"doi").extract[String] + val doi = DoiBoostMappingUtil.normalizeDoi((json \"doi").extract[String]) + + if(doi == null) + return null val is_oa = (json\ "is_oa").extract[Boolean] From 06074ea7d3d1d62a7a7e0b751cb3794fcbd4937e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 29 Jun 2021 18:46:08 +0200 Subject: [PATCH 03/37] added normalization step to the doi --- .../src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 0b5edfb19..0f6dc4885 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -83,8 +83,9 @@ object ORCIDToOAF { JObject(extIds) <- json \ "workDetail" \"extIds" JField("type", JString(typeValue)) <- extIds JField("value", JString(value)) <- extIds - if "doi".equalsIgnoreCase(typeValue) - } yield (typeValue, value) + normalized_value: String = DoiBoostMappingUtil.normalizeDoi(value) + if "doi".equalsIgnoreCase(typeValue) && normalized_value != null + } yield (typeValue, normalized_value) if (doi.nonEmpty) { return doi.map(l =>OrcidWork(oid, l._2)) } From a74de1cda251c83519166a824815f11cd3b4b197 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 29 Jun 2021 18:51:11 +0200 Subject: [PATCH 04/37] added normalization step to the doi --- .../doiboost/mag/SparkProcessMAG.scala | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 173e33360..ecb389af8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.mag import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD @@ -12,6 +13,23 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ object SparkProcessMAG { + + def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={ + d.where(col("Doi").isNotNull) + .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) + .reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2)) + .map(_._2)(Encoders.product[MagPapers]) + .map(mp => { + new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi), + mp.DocType, mp.PaperTitle, mp.OriginalTitle, + mp.BookTitle, mp.Year, mp.Date, mp.Publisher: String, + mp.JournalId, mp.ConferenceSeriesId, mp.ConferenceInstanceId, + mp.Volume, mp.Issue, mp.FirstPage, mp.LastPage, + mp.ReferenceCount, mp.CitationCount, mp.EstimatedCitation, + mp.OriginalVenue, mp.FamilyId, mp.CreatedDate) + })(Encoders.product[MagPapers]) + } + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -33,17 +51,11 @@ object SparkProcessMAG { implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication] implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs) - logger.info("Phase 1) make uninque DOI in Papers:") + logger.info("Phase 1) make uninue DOI in Papers:") val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers] // Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one - val result: RDD[MagPapers] = d.where(col("Doi").isNotNull) - .rdd - .map{ p: MagPapers => Tuple2(p.Doi, p) } - .reduceByKey((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2)) - .map(_._2) - - val distinctPaper: Dataset[MagPapers] = spark.createDataset(result) + val distinctPaper: Dataset[MagPapers] = getDistinctResults(d) distinctPaper.write.mode(SaveMode.Overwrite).save(s"$workingPath/Papers_distinct") From 801763a0fa854ba5bb74d6b4c2c958089f2d8f70 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 29 Jun 2021 19:07:23 +0200 Subject: [PATCH 05/37] there is no more the need to lower case the doi since it is done in the first step. Also changed the creation of the id by using the factory --- .../eu/dnetlib/doiboost/mag/MagDataModel.scala | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala index e8c283a7a..fd9629024 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -196,8 +196,8 @@ case object ConversionUtil { val authors = inputParams._2 val pub = new Publication - pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava) - pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava) + pub.setPid(List(createSP(paper.Doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava) + pub.setOriginalId(List(paper.PaperId.toString, paper.Doi).asJava) //IMPORTANT //The old method result.setId(generateIdentifier(result, doi)) @@ -258,11 +258,14 @@ case object ConversionUtil { val description = inputParams._2 val pub = new Publication - pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava) - pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava) + pub.setPid(List(createSP(paper.Doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava) + pub.setOriginalId(List(paper.PaperId.toString, paper.Doi).asJava) - //Set identifier as 50 | doiboost____::md5(DOI) - pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase)) + //IMPORTANT + //The old method result.setId(generateIdentifier(result, doi)) + //will be replaced using IdentifierFactory + + pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) val mainTitles = createSP(paper.PaperTitle, "main title", ModelConstants.DNET_DATACITE_TITLE) val originalTitles = createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE) From cf758f4f91654745d08bc5dce7cd47ecd1334260 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 10:03:15 +0200 Subject: [PATCH 06/37] added normalization step for the doi --- .../java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 6 ++++-- .../java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala | 3 ++- .../dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 3f6a26c46..15a321431 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -16,9 +16,10 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex import eu.dnetlib.dhp.schema.scholexplorer.OafUtils - import java.util +import eu.dnetlib.doiboost.DoiBoostMappingUtil + case class CrossrefDT(doi: String, json:String, timestamp: Long) {} case class mappingAffiliation(name: String) {} @@ -89,7 +90,7 @@ case object Crossref2Oaf { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats //MAPPING Crossref DOI into PID - val doi: String = (json \ "DOI").extract[String] + val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava) //MAPPING Crossref DOI into OriginalId @@ -101,6 +102,7 @@ case object Crossref2Oaf { val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava) result.setOriginalId(originalIds) + // Add DataInfo result.setDataInfo(generateDataInfo()) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 4a39a2987..159b817c7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.apache.commons.io.IOUtils import org.apache.hadoop.io.{IntWritable, Text} import org.apache.spark.SparkConf @@ -21,7 +22,7 @@ object CrossrefDataset { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val ts:Long = (json \ "indexed" \ "timestamp").extract[Long] - val doi:String = (json \ "DOI").extract[String] + val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) CrossrefDT(doi, input, ts) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala index b11e2d8de..526ff7b3a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass import org.apache.hadoop.io.{IntWritable, Text} @@ -27,7 +28,7 @@ object GenerateCrossrefDataset { def crossrefElement(meta: String): CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(meta) - val doi:String = (json \ "DOI").extract[String] + val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String]) val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long] CrossrefDT(doi, meta, timestamp) From 1299bfb35708bd00088255bc9984b6dc66764bec Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 12:53:27 +0200 Subject: [PATCH 07/37] Added class to test the normalization of doi --- .../dhp/doiboost/NormalizeDoiTest.scala | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala new file mode 100644 index 000000000..a9a841ee9 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala @@ -0,0 +1,46 @@ +package eu.dnetlib.dhp.doiboost + +import eu.dnetlib.doiboost.DoiBoostMappingUtil +import org.junit.jupiter.api.Test + +class NormalizeDOITest { + + @Test + def doiDSLowerCase():Unit = { + val doi ="10.1042/BCJ20160876" + + assert(DoiBoostMappingUtil.normalizeDoi(doi).equals(doi.toLowerCase())) + + } + + + @Test + def doiFiltered():Unit = { + val doi = "0.1042/BCJ20160876" + + assert(DoiBoostMappingUtil.normalizeDoi(doi) == null) + } + + @Test + def doiFiltered2():Unit = { + val doi = "https://doi.org/0.1042/BCJ20160876" + + assert(DoiBoostMappingUtil.normalizeDoi(doi) == null) + } + + + @Test + def doiCleaned():Unit = { + val doi = "https://doi.org/10.1042/BCJ20160876" + + assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase())) + } + + @Test + def doiCleaned1():Unit = { + val doi = "https://doi.org/10.1042/ BCJ20160876" + + assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase())) + } + +} \ No newline at end of file From 1503ccbbb5f2da6e79b00ed5bc974b8020bc1f87 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 12:55:37 +0200 Subject: [PATCH 08/37] added tests for the normalization of the dois --- .../crossref/CrossrefMappingTest.scala | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index cb543b4d7..0fa34d88e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -461,5 +461,37 @@ class CrossrefMappingTest { // }) } + @Test + def testNormalizeDOI(): Unit = { + val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString + val line :String = "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}]," + val json = template.replace("%s", line) + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + assertTrue(resultList.nonEmpty) + val items = resultList.filter(p => p.isInstanceOf[Publication]) + val result: Result = items.head.asInstanceOf[Publication] + + result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi"))) + assertTrue(result.getPid.size() == 1) + result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase()))) + + } + + @Test + def testNormalizeDOI2(): Unit = { + val template = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString + + val resultList: List[Oaf] = Crossref2Oaf.convert(template) + assertTrue(resultList.nonEmpty) + val items = resultList.filter(p => p.isInstanceOf[Publication]) + val result: Result = items.head.asInstanceOf[Publication] + + result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi"))) + assertTrue(result.getPid.size() == 1) + result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase()))) + + } + + } From e487b5544c9c81ea348441a8e4f4456bb1c5a800 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 12:57:11 +0200 Subject: [PATCH 09/37] added tests for the normalization of the dois --- .../dnetlib/doiboost/mag/MAGMappingTest.scala | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 88b1669f4..7eb50665e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -4,7 +4,7 @@ import java.sql.Timestamp import eu.dnetlib.dhp.schema.oaf.Publication import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.api.java.function.MapFunction import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} @@ -62,6 +62,55 @@ class MAGMappingTest { logger.debug(description) } + @Test + def normalizeDoiTest():Unit = { + + import org.json4s.jackson.Serialization.write + import org.json4s.DefaultFormats + + implicit val formats = DefaultFormats + + val conf = new SparkConf().setAppName("test").setMaster("local[2]") + val sc = new SparkContext(conf) + val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + val path = getClass.getResource("magPapers.json").getPath + + import org.apache.spark.sql.Encoders + val schema = Encoders.product[MagPapers].schema + + import spark.implicits._ + val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers] + val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) + assertTrue(ret.count == 10) + ret.take(10).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase()))) + + spark.close() + } + + @Test + def normalizeDoiTest2():Unit = { + + import org.json4s.jackson.Serialization.write + import org.json4s.DefaultFormats + + implicit val formats = DefaultFormats + + val conf = new SparkConf().setAppName("test").setMaster("local[2]") + val sc = new SparkContext(conf) + val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + val path = getClass.getResource("duplicatedMagPapers.json").getPath + + import org.apache.spark.sql.Encoders + val schema = Encoders.product[MagPapers].schema + + import spark.implicits._ + val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers] + val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) + assertTrue(ret.count == 8) + ret.take(8).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase()))) + spark.close() + //ret.take(8).foreach(mp => println(write(mp))) + } } From 149f85ddf59f1e13faf2ca63b1bdfd3609b40459 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 13:00:52 +0200 Subject: [PATCH 10/37] added tests for the normalization of the dois --- .../orcid/MappingORCIDToOAFTest.scala | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index cdda3b2af..7628fb853 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -12,6 +12,8 @@ import org.slf4j.{Logger, LoggerFactory} import java.nio.file.Path import scala.io.Source +import scala.collection.JavaConversions._ + class MappingORCIDToOAFTest { val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) val mapper = new ObjectMapper() @@ -63,9 +65,26 @@ class MappingORCIDToOAFTest { } + @Test + def testExtractDat1():Unit ={ + val aList: List[OrcidAuthor] = List(OrcidAuthor("0000-0002-4335-5309", Some("Lucrecia"), Some("Curto"), null, null, null ), + OrcidAuthor("0000-0001-7501-3330", Some("Emilio"), Some("Malchiodi"), null, null, null ), OrcidAuthor("0000-0002-5490-9186", Some("Sofia"), Some("Noli Truant"), null, null, null )) + + val orcid:ORCIDItem = ORCIDItem("10.1042/BCJ20160876", aList) + + val oaf = ORCIDToOAF.convertTOOAF(orcid) + assert(oaf.getPid.size() == 1) + oaf.getPid.toList.foreach(pid => assert(pid.getQualifier.getClassid.equals("doi"))) + oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876".toLowerCase()))) + //println(mapper.writeValueAsString(ORCIDToOAF.convertTOOAF(orcid))) + + + } + + } From f8eec0ca9a8a9bf761bfe73c63889d42c52d47ff Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 13:19:54 +0200 Subject: [PATCH 11/37] added resource to test the normalization of doi during the import of MAG --- .../eu/dnetlib/doiboost/mag/duplicatedMagPapers.json | 10 ++++++++++ .../resources/eu/dnetlib/doiboost/mag/magPapers.json | 10 ++++++++++ 2 files changed, 20 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/duplicatedMagPapers.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/magPapers.json diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/duplicatedMagPapers.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/duplicatedMagPapers.json new file mode 100644 index 000000000..6b591a592 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/duplicatedMagPapers.json @@ -0,0 +1,10 @@ +[{"PaperId":2866429360,"Rank":1,"Doi":"10.5465/AMBPP.2018.12619SYMPOSIUM","DocType":null,"PaperTitle":"new directions in research on conflict dynamics","OriginalTitle":"New Directions in Research on Conflict Dynamics","BookTitle":null,"Year":2018,"Date":"2018-07-09T00:00:00Z","Publisher":"Academy of Management Briarcliff Manor, NY 10510","JournalId":null,"Volume":"2018","Issue":"1","FirstPage":"12619","LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Academy of Management Proceedings","CreatedDate":"2018-07-19T00:00:00Z"}, + {"PaperId":2871494677,"Rank":2,"Doi":"10.1007/978-981-10-8971-8_33","DocType":null,"PaperTitle":"wild flame detection using weight adaptive particle filter from monocular video","OriginalTitle":"Wild Flame Detection Using Weight Adaptive Particle Filter from Monocular Video","BookTitle":null,"Year":2019,"Date":"2019-01-01T00:00:00Z","Publisher":"Springer, Singapore","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"357","LastPage":"365","ReferenceCount":14,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-07-19T00:00:00Z"}, + {"PaperId":2883520096,"Rank":3,"Doi":"10.5465/AMBPP .2018.12619SYMPOSIUM","DocType":"Journal","PaperTitle":"elaboracion de un corpus cacografico desde la disponibilidad lexica en estudiantes sevillanos un analisis para la ensenanza de la lengua","OriginalTitle":"Elaboración de un corpus cacográfico desde la disponibilidad léxica en estudiantes sevillanos. Un análisis para la enseñanza de la lengua","BookTitle":null,"Year":2018,"Date":"2018-07-13T00:00:00Z","Publisher":"Poli papers","JournalId":2738339871,"Volume":"13","Issue":"1","FirstPage":"119","LastPage":"131","ReferenceCount":28,"CitationCount":2,"EstimatedCitation":2,"OriginalVenue":"Revista de Lingüística y Lenguas Aplicadas","CreatedDate":"2018-08-03T00:00:00Z"}, + {"PaperId":2883800636,"Rank":4,"Doi":"10.1007/978-3-319-92513-4_4","DocType":null,"PaperTitle":"cognitive advantage of bilingualism and its criticisms","OriginalTitle":"Cognitive Advantage of Bilingualism and Its Criticisms","BookTitle":null,"Year":2018,"Date":"2018-01-01T00:00:00Z","Publisher":"Springer, Cham","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"67","LastPage":"89","ReferenceCount":74,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-08-03T00:00:00Z"}, + {"PaperId":2885023064,"Rank":5,"Doi":"10.1097/NNA.0000000000000647","DocType":"Journal","PaperTitle":"enhancing and advancing shared governance through a targeted decision making redesign","OriginalTitle":"Enhancing and Advancing Shared Governance Through a Targeted Decision-Making Redesign.","BookTitle":null,"Year":2018,"Date":"2018-09-01T00:00:00Z","Publisher":"J Nurs Adm","JournalId":194945867,"Volume":"48","Issue":"9","FirstPage":"445","LastPage":"451","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Journal of Nursing Administration","CreatedDate":"2018-08-22T00:00:00Z"}, + {"PaperId":2885607541,"Rank":1,"Doi":"10.1007/S10465-018-9283-7","DocType":"Journal","PaperTitle":"dance movement therapists attitudes and actions regarding lgbtqi and gender nonconforming communities","OriginalTitle":"Dance/Movement Therapists’ Attitudes and Actions Regarding LGBTQI and Gender Nonconforming Communities","BookTitle":null,"Year":2018,"Date":"2018-08-07T00:00:00Z","Publisher":"Springer US","JournalId":104993962,"Volume":"40","Issue":"2","FirstPage":"202","LastPage":"223","ReferenceCount":40,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"American Journal of Dance Therapy","CreatedDate":"2018-08-22T00:00:00Z"}, + {"PaperId":2886182429,"Rank":2,"Doi":"10.13039/501100003329","DocType":null,"PaperTitle":"caracteres de adaptacion en judia comun phaseolus vulgaris l aproximacion genetica e identificacion de qtls","OriginalTitle":"Caracteres de adaptación en judía común (Phaseolus vulgaris L.): aproximación genética e identificación de QTLs","BookTitle":null,"Year":2017,"Date":"2017-06-15T00:00:00Z","Publisher":"CSIC - Misión Biológica de Galicia (MBG)","JournalId":null,"Volume":null,"Issue":null,"FirstPage":null,"LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":null,"CreatedDate":"2018-08-22T00:00:00Z"}, + {"PaperId":2887149460,"Rank":3,"Doi":"10.1093/FEMSLE/FNY192","DocType":"Journal","PaperTitle":"small extracellular particles with big potential for horizontal gene transfer membrane vesicles and gene transfer agents","OriginalTitle":"Small extracellular particles with big potential for horizontal gene transfer: membrane vesicles and gene transfer agents.","BookTitle":null,"Year":2018,"Date":"2018-10-01T00:00:00Z","Publisher":"Narnia","JournalId":34954451,"Volume":"365","Issue":"19","FirstPage":null,"LastPage":null,"ReferenceCount":124,"CitationCount":13,"EstimatedCitation":13,"OriginalVenue":"Fems Microbiology Letters","CreatedDate":"2018-08-22T00:00:00Z"}, + {"PaperId":2887446149,"Rank":4,"Doi":"10.5465/ambpp.2018.12619symposium","DocType":"Journal","PaperTitle":"notes from the field toxigenic vibrio cholerae o141 in a traveler to florida nebraska 2017","OriginalTitle":"Notes from the Field: Toxigenic Vibrio cholerae O141 in a Traveler to Florida — Nebraska, 2017","BookTitle":null,"Year":2018,"Date":"2018-08-03T00:00:00Z","Publisher":"Centers for Disease Control MMWR Office","JournalId":183158886,"Volume":"67","Issue":"30","FirstPage":"838","LastPage":"839","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Morbidity and Mortality Weekly Report","CreatedDate":"2018-08-22T00:00:00Z"}, + {"PaperId":2889180499,"Rank":5,"Doi":"10.1007/S10924-018-1299-Z","DocType":"Journal","PaperTitle":"hybrid adsorbent materials obtained by the combination of poly ethylene alt maleic anhydride with lignin and lignosulfonate","OriginalTitle":"Hybrid Adsorbent Materials Obtained by the Combination of Poly(ethylene-alt-maleic anhydride) with Lignin and Lignosulfonate","BookTitle":null,"Year":2018,"Date":"2018-08-30T00:00:00Z","Publisher":"Springer US","JournalId":193665811,"Volume":"26","Issue":"11","FirstPage":"4293","LastPage":"4302","ReferenceCount":29,"CitationCount":5,"EstimatedCitation":5,"OriginalVenue":"Journal of Polymers and The Environment","CreatedDate":"2018-09-07T00:00:00Z"}] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/magPapers.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/magPapers.json new file mode 100644 index 000000000..738f5a96c --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/magPapers.json @@ -0,0 +1,10 @@ +[{"PaperId":2866429360,"Rank":1,"Doi":"10.5465/AMBPP.2018.12619SYMPOSIUM","DocType":null,"PaperTitle":"new directions in research on conflict dynamics","OriginalTitle":"New Directions in Research on Conflict Dynamics","BookTitle":null,"Year":2018,"Date":"2018-07-09T00:00:00Z","Publisher":"Academy of Management Briarcliff Manor, NY 10510","JournalId":null,"Volume":"2018","Issue":"1","FirstPage":"12619","LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Academy of Management Proceedings","CreatedDate":"2018-07-19T00:00:00Z"}, +{"PaperId":2871494677,"Rank":2,"Doi":"10.1007/978-981-10-8971-8_33","DocType":null,"PaperTitle":"wild flame detection using weight adaptive particle filter from monocular video","OriginalTitle":"Wild Flame Detection Using Weight Adaptive Particle Filter from Monocular Video","BookTitle":null,"Year":2019,"Date":"2019-01-01T00:00:00Z","Publisher":"Springer, Singapore","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"357","LastPage":"365","ReferenceCount":14,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-07-19T00:00:00Z"}, +{"PaperId":2883520096,"Rank":3,"Doi":"10.4995/RLYLA.2018.9176","DocType":"Journal","PaperTitle":"elaboracion de un corpus cacografico desde la disponibilidad lexica en estudiantes sevillanos un analisis para la ensenanza de la lengua","OriginalTitle":"Elaboración de un corpus cacográfico desde la disponibilidad léxica en estudiantes sevillanos. Un análisis para la enseñanza de la lengua","BookTitle":null,"Year":2018,"Date":"2018-07-13T00:00:00Z","Publisher":"Poli papers","JournalId":2738339871,"Volume":"13","Issue":"1","FirstPage":"119","LastPage":"131","ReferenceCount":28,"CitationCount":2,"EstimatedCitation":2,"OriginalVenue":"Revista de Lingüística y Lenguas Aplicadas","CreatedDate":"2018-08-03T00:00:00Z"}, +{"PaperId":2883800636,"Rank":4,"Doi":"10.1007/978-3-319-92513-4_4","DocType":null,"PaperTitle":"cognitive advantage of bilingualism and its criticisms","OriginalTitle":"Cognitive Advantage of Bilingualism and Its Criticisms","BookTitle":null,"Year":2018,"Date":"2018-01-01T00:00:00Z","Publisher":"Springer, Cham","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"67","LastPage":"89","ReferenceCount":74,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-08-03T00:00:00Z"}, +{"PaperId":2885023064,"Rank":5,"Doi":"10.1097/NNA.0000000000000647","DocType":"Journal","PaperTitle":"enhancing and advancing shared governance through a targeted decision making redesign","OriginalTitle":"Enhancing and Advancing Shared Governance Through a Targeted Decision-Making Redesign.","BookTitle":null,"Year":2018,"Date":"2018-09-01T00:00:00Z","Publisher":"J Nurs Adm","JournalId":194945867,"Volume":"48","Issue":"9","FirstPage":"445","LastPage":"451","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Journal of Nursing Administration","CreatedDate":"2018-08-22T00:00:00Z"}, +{"PaperId":2885607541,"Rank":1,"Doi":"10.1007/S10465-018-9283-7","DocType":"Journal","PaperTitle":"dance movement therapists attitudes and actions regarding lgbtqi and gender nonconforming communities","OriginalTitle":"Dance/Movement Therapists’ Attitudes and Actions Regarding LGBTQI and Gender Nonconforming Communities","BookTitle":null,"Year":2018,"Date":"2018-08-07T00:00:00Z","Publisher":"Springer US","JournalId":104993962,"Volume":"40","Issue":"2","FirstPage":"202","LastPage":"223","ReferenceCount":40,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"American Journal of Dance Therapy","CreatedDate":"2018-08-22T00:00:00Z"}, +{"PaperId":2886182429,"Rank":2,"Doi":"10.13039/501100003329","DocType":null,"PaperTitle":"caracteres de adaptacion en judia comun phaseolus vulgaris l aproximacion genetica e identificacion de qtls","OriginalTitle":"Caracteres de adaptación en judía común (Phaseolus vulgaris L.): aproximación genética e identificación de QTLs","BookTitle":null,"Year":2017,"Date":"2017-06-15T00:00:00Z","Publisher":"CSIC - Misión Biológica de Galicia (MBG)","JournalId":null,"Volume":null,"Issue":null,"FirstPage":null,"LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":null,"CreatedDate":"2018-08-22T00:00:00Z"}, +{"PaperId":2887149460,"Rank":3,"Doi":"10.1093/FEMSLE/FNY192","DocType":"Journal","PaperTitle":"small extracellular particles with big potential for horizontal gene transfer membrane vesicles and gene transfer agents","OriginalTitle":"Small extracellular particles with big potential for horizontal gene transfer: membrane vesicles and gene transfer agents.","BookTitle":null,"Year":2018,"Date":"2018-10-01T00:00:00Z","Publisher":"Narnia","JournalId":34954451,"Volume":"365","Issue":"19","FirstPage":null,"LastPage":null,"ReferenceCount":124,"CitationCount":13,"EstimatedCitation":13,"OriginalVenue":"Fems Microbiology Letters","CreatedDate":"2018-08-22T00:00:00Z"}, +{"PaperId":2887446149,"Rank":4,"Doi":"10.15585/MMWR.MM6730A7","DocType":"Journal","PaperTitle":"notes from the field toxigenic vibrio cholerae o141 in a traveler to florida nebraska 2017","OriginalTitle":"Notes from the Field: Toxigenic Vibrio cholerae O141 in a Traveler to Florida — Nebraska, 2017","BookTitle":null,"Year":2018,"Date":"2018-08-03T00:00:00Z","Publisher":"Centers for Disease Control MMWR Office","JournalId":183158886,"Volume":"67","Issue":"30","FirstPage":"838","LastPage":"839","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Morbidity and Mortality Weekly Report","CreatedDate":"2018-08-22T00:00:00Z"}, +{"PaperId":2889180499,"Rank":5,"Doi":"10.1007/S10924-018-1299-Z","DocType":"Journal","PaperTitle":"hybrid adsorbent materials obtained by the combination of poly ethylene alt maleic anhydride with lignin and lignosulfonate","OriginalTitle":"Hybrid Adsorbent Materials Obtained by the Combination of Poly(ethylene-alt-maleic anhydride) with Lignin and Lignosulfonate","BookTitle":null,"Year":2018,"Date":"2018-08-30T00:00:00Z","Publisher":"Springer US","JournalId":193665811,"Volume":"26","Issue":"11","FirstPage":"4293","LastPage":"4302","ReferenceCount":29,"CitationCount":5,"EstimatedCitation":5,"OriginalVenue":"Journal of Polymers and The Environment","CreatedDate":"2018-09-07T00:00:00Z"}] \ No newline at end of file From 03767ea8e6cbf5f987ef46f678f8dc0d1b857eec Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 13:21:24 +0200 Subject: [PATCH 12/37] slight modification of the resource to accomodate also doi normalization tests --- .../test/resources/eu/dnetlib/doiboost/crossref/article.json | 2 +- .../eu/dnetlib/doiboost/crossref/article_funder_template.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article.json index 69424d0ad..5bdf9b3f3 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article.json +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article.json @@ -1,5 +1,5 @@ { - "DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46", + "DOI": " 10.26850/1678-4618eqj.v35.1.2010.p41-46", "issued": { "date-parts": [ [ diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article_funder_template.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article_funder_template.json index 1a49109ec..5ab0544d1 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article_funder_template.json +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article_funder_template.json @@ -1,5 +1,5 @@ { - "DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46", + "DOI": "10.26850/1678-4618EQJ.v35.1.2010.p41-46", "issued": { "date-parts": [ [ From 86f47afcc7736ed9c7eaf63aa52df159fd39d9ea Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 14:36:49 +0200 Subject: [PATCH 13/37] slight modification of the resource to accomodate also doi normalization tests --- .../src/test/resources/eu/dnetlib/doiboost/uw/input.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/uw/input.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/uw/input.json index 33d4dbc3c..68b22b1d6 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/uw/input.json +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/uw/input.json @@ -1,6 +1,6 @@ -{"doi": "10.1038/2211089b0", "year": 1969, "genre": "journal-article", "is_oa": true, "title": "Planning: Trees in Danger", "doi_url": "https://doi.org/10.1038/2211089b0", "updated": "2020-02-06T13:51:15.164623", "oa_status": "bronze", "publisher": "Springer Nature", "z_authors": [{"name": "Our Planning Correspondent"}], "is_paratext": false, "journal_name": "Nature", "oa_locations": [{"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0028-0836,1476-4687", "journal_issn_l": "0028-0836", "published_date": "1969-03-01", "best_oa_location": {"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false} -{"doi": "10.1021/acs.bioconjchem.8b00058.s001", "year": null, "genre": "component", "is_oa": true, "title": "Engineering Reversible CellCell Interactions with Lipid Anchored Prosthetic Receptors", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "updated": "2020-04-04T21:15:41.966773", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false} -{"doi": "10.1021/acs.bioconjchem.8b00086.s001", "year": null, "genre": "component", "is_oa": true, "title": "Rapid, Stoichiometric, Site-Specific Modification of Aldehyde-Containing Proteins Using a Tandem Knoevenagel-Intra Michael Addition Reaction", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "updated": "2020-04-04T21:24:50.688286", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false} +{"doi": "10.1038/221 1089b0", "year": 1969, "genre": "journal-article", "is_oa": true, "title": "Planning: Trees in Danger", "doi_url": "https://doi.org/10.1038/2211089b0", "updated": "2020-02-06T13:51:15.164623", "oa_status": "bronze", "publisher": "Springer Nature", "z_authors": [{"name": "Our Planning Correspondent"}], "is_paratext": false, "journal_name": "Nature", "oa_locations": [{"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0028-0836,1476-4687", "journal_issn_l": "0028-0836", "published_date": "1969-03-01", "best_oa_location": {"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false} +{"doi": "10.1021/acs.bioconjchem.8b00058. s001", "year": null, "genre": "component", "is_oa": true, "title": "Engineering Reversible CellCell Interactions with Lipid Anchored Prosthetic Receptors", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "updated": "2020-04-04T21:15:41.966773", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false} +{"doi": "10.1021/acs.bioconjCHEM.8b00086.s001", "year": null, "genre": "component", "is_oa": true, "title": "Rapid, Stoichiometric, Site-Specific Modification of Aldehyde-Containing Proteins Using a Tandem Knoevenagel-Intra Michael Addition Reaction", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "updated": "2020-04-04T21:24:50.688286", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false} {"doi": "10.1192/bjp.89.375.270", "year": 1943, "genre": "journal-article", "is_oa": false, "title": "Unusual Pituitary Activity in a Case of Anorexia Nervosa", "doi_url": "https://doi.org/10.1192/bjp.89.375.270", "updated": "2020-03-09T08:54:12.827623", "oa_status": "closed", "publisher": "Royal College of Psychiatrists", "z_authors": [{"given": "M.", "family": "Reiss", "sequence": "first"}], "is_paratext": false, "journal_name": "Journal of Mental Science", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0368-315X,2514-9946", "journal_issn_l": "0368-315X", "published_date": "1943-04-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} {"doi": "10.1016/s0167-7012(99)00056-1", "year": 1999, "genre": "journal-article", "is_oa": false, "title": "Development of radiographic and microscopic techniques for the characterization of bacterial transport in intact sediment cores from Oyster, Virginia", "doi_url": "https://doi.org/10.1016/s0167-7012(99)00056-1", "updated": "2020-04-05T11:15:40.634599", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Hailiang", "family": "Dong", "sequence": "first"}, {"given": "Tullis C.", "family": "Onstott", "sequence": "additional"}, {"given": "Mary F.", "family": "DeFlaun", "sequence": "additional"}, {"given": "Mark E.", "family": "Fuller", "sequence": "additional"}, {"given": "Kathleen M.", "family": "Gillespie", "sequence": "additional"}, {"given": "James K.", "family": "Fredrickson", "sequence": "additional"}], "is_paratext": false, "journal_name": "Journal of Microbiological Methods", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0167-7012", "journal_issn_l": "0167-7012", "published_date": "1999-08-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} {"doi": "10.1086/mp.1905.2.issue-3", "year": 1905, "genre": "journal-issue", "is_oa": false, "title": null, "doi_url": "https://doi.org/10.1086/mp.1905.2.issue-3", "updated": "2020-02-07T15:51:44.560109", "oa_status": "closed", "publisher": "University of Chicago Press", "z_authors": null, "is_paratext": false, "journal_name": "Modern Philology", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0026-8232,1545-6951", "journal_issn_l": "0026-8232", "published_date": "1905-01-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} @@ -38,7 +38,7 @@ {"doi": "10.1016/s1067-991x(03)70006-6", "year": 2003, "genre": "journal-article", "is_oa": false, "title": "Use of the autolaunch method of dispatching a helicopter", "doi_url": "https://doi.org/10.1016/s1067-991x(03)70006-6", "updated": "2020-03-12T07:24:35.659404", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Kathleen S.", "family": "Berns", "sequence": "first"}, {"given": "Jeffery J.", "family": "Caniglia", "sequence": "additional"}, {"given": "Daniel G.", "family": "Hankins", "sequence": "additional"}, {"given": "Scott P.", "family": "Zietlow", "sequence": "additional"}], "is_paratext": false, "journal_name": "Air Medical Journal", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1067-991X", "journal_issn_l": "1067-991X", "published_date": "2003-05-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} {"doi": "10.1016/j.clinimag.2015.12.002", "year": 2016, "genre": "journal-article", "is_oa": false, "title": "Imaging findings, diagnosis, and clinical outcomes in patients with mycotic aneurysms: single center experience", "doi_url": "https://doi.org/10.1016/j.clinimag.2015.12.002", "updated": "2020-03-12T17:56:16.049536", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Amy R.", "family": "Deipolyi", "sequence": "first"}, {"given": "Alexander", "family": "Bailin", "sequence": "additional"}, {"given": "Ali", "family": "Khademhosseini", "sequence": "additional"}, {"ORCID": "http://orcid.org/0000-0003-4984-1778", "given": "Rahmi", "family": "Oklu", "sequence": "additional", "authenticated-orcid": false}], "is_paratext": false, "journal_name": "Clinical Imaging", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0899-7071", "journal_issn_l": "0899-7071", "published_date": "2016-05-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} {"doi": "10.1016/j.biocel.2013.05.012", "year": 2013, "genre": "journal-article", "is_oa": false, "title": "MAVS-mediated host cell defense is inhibited by Borna disease virus", "doi_url": "https://doi.org/10.1016/j.biocel.2013.05.012", "updated": "2020-03-09T20:49:25.975316", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Yujun", "family": "Li", "sequence": "first"}, {"given": "Wuqi", "family": "Song", "sequence": "additional"}, {"given": "Jing", "family": "Wu", "sequence": "additional"}, {"given": "Qingmeng", "family": "Zhang", "sequence": "additional"}, {"given": "Junming", "family": "He", "sequence": "additional"}, {"given": "Aimei", "family": "Li", "sequence": "additional"}, {"given": "Jun", "family": "Qian", "sequence": "additional"}, {"given": "Aixia", "family": "Zhai", "sequence": "additional"}, {"given": "Yunlong", "family": "Hu", "sequence": "additional"}, {"given": "Wenping", "family": "Kao", "sequence": "additional"}, {"given": "Lanlan", "family": "Wei", "sequence": "additional"}, {"given": "Fengmin", "family": "Zhang", "sequence": "additional"}, {"given": "Dakang", "family": "Xu", "sequence": "additional"}], "is_paratext": false, "journal_name": "The International Journal of Biochemistry & Cell Biology", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1357-2725", "journal_issn_l": "1357-2725", "published_date": "2013-08-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} -{"doi": "10.1021/acsami.8b01074.s004", "year": null, "genre": "component", "is_oa": false, "title": "Solution Coating of Pharmaceutical Nanothin Films and Multilayer Nanocomposites with Controlled Morphology and Polymorphism", "doi_url": "https://doi.org/10.1021/acsami.8b01074.s004", "updated": "2020-04-04T21:02:07.815195", "oa_status": "closed", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} +{"doi": "10.1021/acsami.8b01074 .s004", "year": null, "genre": "component", "is_oa": false, "title": "Solution Coating of Pharmaceutical Nanothin Films and Multilayer Nanocomposites with Controlled Morphology and Polymorphism", "doi_url": "https://doi.org/10.1021/acsami.8b01074.s004", "updated": "2020-04-04T21:02:07.815195", "oa_status": "closed", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} {"doi": "10.1093/nar/18.18.5552", "year": 1990, "genre": "journal-article", "is_oa": true, "title": "Nucleotide sequence of LTR-gag region of Rous sarcoma virus adapted to semi-permissive host", "doi_url": "https://doi.org/10.1093/nar/18.18.5552", "updated": "2020-02-07T07:59:06.754183", "oa_status": "green", "publisher": "Oxford University Press (OUP)", "z_authors": [{"given": "Vladimir I.", "family": "Kashuba", "sequence": "first"}, {"given": "Serge V.", "family": "Zubak", "sequence": "additional"}, {"given": "Vadim M.", "family": "Kavsan", "sequence": "additional"}, {"given": "Alla V.", "family": "Rynditch", "sequence": "additional"}, {"given": "Ivo", "family": "Hlozanek", "sequence": "additional"}], "is_paratext": false, "journal_name": "Nucleic Acids Research", "oa_locations": [{"url": "http://europepmc.org/articles/pmc332244?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:332244", "is_best": true, "license": null, "updated": "2017-10-22T11:38:23.025497", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH doi match)", "host_type": "repository", "endpoint_id": "pubmedcentral.nih.gov", "url_for_pdf": "http://europepmc.org/articles/pmc332244?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc332244", "repository_institution": "pubmedcentral.nih.gov"}, {"url": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC332244", "pmh_id": null, "is_best": false, "license": null, "updated": "2020-04-24T18:18:02.810779", "version": "publishedVersion", "evidence": "oa repository (via pmcid lookup)", "host_type": "repository", "endpoint_id": null, "url_for_pdf": null, "url_for_landing_page": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC332244", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0305-1048,1362-4962", "journal_issn_l": "0305-1048", "published_date": "1990-01-01", "best_oa_location": {"url": "http://europepmc.org/articles/pmc332244?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:332244", "is_best": true, "license": null, "updated": "2017-10-22T11:38:23.025497", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH doi match)", "host_type": "repository", "endpoint_id": "pubmedcentral.nih.gov", "url_for_pdf": "http://europepmc.org/articles/pmc332244?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc332244", "repository_institution": "pubmedcentral.nih.gov"}, "journal_is_in_doaj": false, "has_repository_copy": true} {"doi": "10.1021/acsami.8b01294.s001", "year": null, "genre": "component", "is_oa": true, "title": "Highly Elastic Biodegradable Single-Network Hydrogel for Cell Printing", "doi_url": "https://doi.org/10.1021/acsami.8b01294.s001", "updated": "2020-04-04T22:12:53.813586", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acsami.8b01294.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T22:11:06.757648", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acsami.8b01294.s001", "url_for_landing_page": null, "repository_institution": null}, {"url": "http://europepmc.org/articles/pmc5876623?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:5876623", "is_best": false, "license": "acs-specific: authorchoice/editors choice usage agreement", "updated": "2020-02-19T13:50:59.876849", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH title match)", "host_type": "repository", "endpoint_id": "ac9de7698155b820de7", "url_for_pdf": "http://europepmc.org/articles/pmc5876623?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc5876623", "repository_institution": "National Institutes of Health (USA) - US National Library of Medicine"}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acsami.8b01294.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T22:11:06.757648", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acsami.8b01294.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": true} {"doi": "10.1097/scs.0b013e3181ef67ba", "year": 2010, "genre": "journal-article", "is_oa": false, "title": "Anomaly of the Internal Carotid Artery Detected During Tonsillectomy", "doi_url": "https://doi.org/10.1097/scs.0b013e3181ef67ba", "updated": "2020-02-10T19:05:26.462040", "oa_status": "closed", "publisher": "Ovid Technologies (Wolters Kluwer Health)", "z_authors": [{"given": "Serdar", "family": "Ceylan", "sequence": "first"}, {"given": "Serkan", "family": "Salman", "sequence": "additional"}, {"given": "Fatih", "family": "Bora", "sequence": "additional"}], "is_paratext": false, "journal_name": "Journal of Craniofacial Surgery", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1049-2275", "journal_issn_l": "1049-2275", "published_date": "2010-09-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false} From bc3434764361747df2fb35ed68ac4dc75d058ced Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 14:37:08 +0200 Subject: [PATCH 14/37] added assertions to verify doi normalization --- .../dnetlib/doiboost/uw/UnpayWallMappingTest.scala | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala index 6688fc616..fa696fffc 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala @@ -20,16 +20,26 @@ class UnpayWallMappingTest { val Ilist = Source.fromInputStream(getClass.getResourceAsStream("input.json")).mkString - + var i:Int = 0 for (line <-Ilist.lines) { val p = UnpayWallToOAF.convertToOAF(line) if(p!= null) { assertTrue(p.getInstance().size()==1) + if (i== 0){ + assertTrue(p.getPid.get(0).getValue.equals("10.1038/2211089b0")) + } + if (i== 1){ + assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00058.s001")) + } + if (i== 2){ + assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00086.s001")) + } logger.info(s"ID : ${p.getId}") } assertNotNull(line) assertTrue(line.nonEmpty) + i = i+1 } @@ -39,7 +49,9 @@ class UnpayWallMappingTest { val item = UnpayWallToOAF.convertToOAF(l) assertEquals(item.getInstance().get(0).getAccessright.getOpenAccessRoute, OpenAccessRoute.bronze) + logger.info(mapper.writeValueAsString(item)) + } } From 0892cad4e86cab1fdbdcf9a48bd2b26669cce8a1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 5 Jul 2021 16:21:42 +0200 Subject: [PATCH 15/37] the normalization of the content of value was not visible outside the block. Moved doi normalization operation while returning value --- .../main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index 0f6dc4885..1cd3f7028 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -83,9 +83,8 @@ object ORCIDToOAF { JObject(extIds) <- json \ "workDetail" \"extIds" JField("type", JString(typeValue)) <- extIds JField("value", JString(value)) <- extIds - normalized_value: String = DoiBoostMappingUtil.normalizeDoi(value) - if "doi".equalsIgnoreCase(typeValue) && normalized_value != null - } yield (typeValue, normalized_value) + if "doi".equalsIgnoreCase(typeValue) + } yield (typeValue, DoiBoostMappingUtil.normalizeDoi(value)) if (doi.nonEmpty) { return doi.map(l =>OrcidWork(oid, l._2)) } @@ -103,7 +102,7 @@ object ORCIDToOAF { def convertTOOAF(input:ORCIDItem) :Publication = { val doi = input.doi val pub:Publication = new Publication - pub.setPid(List(createSP(doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava) + pub.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava) pub.setDataInfo(generateDataInfo()) pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub)) From 7177c252616dd1e48aafb85ebfb5dff229d72444 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 5 Jul 2021 16:22:38 +0200 Subject: [PATCH 16/37] added check for null value during doi normalization --- .../src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 1baf55b89..12e4ac379 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -358,6 +358,8 @@ object DoiBoostMappingUtil { def isEmpty(x: String) = x == null || x.trim.isEmpty def normalizeDoi(input : String) :String ={ + if(input == null) + return null val replaced = input.replaceAll("(?:\\n|\\r|\\t|\\s)", "").toLowerCase.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX) if (isEmpty(replaced)) return null From 238d692a0a89ca4aaf74e00532d07a9d9754dd02 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 5 Jul 2021 16:23:33 +0200 Subject: [PATCH 17/37] apply specific AuthorMerger for doiboost --- .../main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index b77de13b9..ec9ae455e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -104,7 +104,7 @@ object SparkGenerateDoiBoost { val otherPub = item._2._2 if (otherPub != null) { crossrefPub.mergeFrom(otherPub) - crossrefPub.setAuthor(AuthorMerger.mergeAuthor(crossrefPub.getAuthor, otherPub.getAuthor)) + crossrefPub.setAuthor(DoiBoostAuthorMerger.mergeAuthor(crossrefPub.getAuthor, otherPub.getAuthor)) } } crossrefPub From f64f5d9e231a48182edea10d99c956f4df410935 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 5 Jul 2021 16:24:47 +0200 Subject: [PATCH 18/37] first implementation and test class for the specific Author Merger for doiboost. First change: crossref as base to be enriched. Modified the normalization function to remove accents from words --- .../doiboost/DoiBoostAuthorMerger.java | 175 ++++++++++++++++++ .../doiboost/DoiBoostAuthorMergerTest.java | 120 ++++++++++++ 2 files changed, 295 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java new file mode 100644 index 000000000..741df13ff --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java @@ -0,0 +1,175 @@ + +package eu.dnetlib.doiboost; + +import java.text.Normalizer; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; + +import com.wcohen.ss.JaroWinkler; + +import eu.dnetlib.dhp.oa.merge.AuthorMerger; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.pace.model.Person; +import scala.Tuple2; + +public class DoiBoostAuthorMerger { + + private static final Double THRESHOLD = 0.95; + + public static List merge(List> authors) { + + Iterator> it = authors.iterator(); + final List author = it.next(); + + it.forEachRemaining(autList -> enrichPidFromList(author, autList, THRESHOLD)); + + return author; + + } + + public static List mergeAuthor(final List crossrefAuthor, final List otherAuthor, + Double threshold) { + + enrichPidFromList(crossrefAuthor, otherAuthor, threshold); + return crossrefAuthor; + } + + public static List mergeAuthor(final List crossrefAuthor, final List otherAuthor) { + return mergeAuthor(crossrefAuthor, otherAuthor, THRESHOLD); + } + + private static void enrichPidFromList(List base, List enrich, Double threshold) { + if (base == null || enrich == null) + return; + + // (if an Author has more than 1 pid, it appears 2 times in the list) + final Map basePidAuthorMap = base + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .map(p -> new Tuple2<>(pidToComparableString(p), a))) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); + + // (list of pid that are missing in the other list) + final List> pidToEnrich = enrich + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .flatMap( + a -> a + .getPid() + .stream() + .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) + .map(p -> new Tuple2<>(p, a))) + .collect(Collectors.toList()); + + pidToEnrich + .forEach( + a -> { + Optional> simAuthor = base + .stream() + .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) + .max(Comparator.comparing(Tuple2::_1)); + + if (simAuthor.isPresent()) { + double th = threshold; + // increase the threshold if the surname is too short + if (simAuthor.get()._2().getSurname() != null + && simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0) + th = 0.99; + + if (simAuthor.get()._1() > th) { + Author r = simAuthor.get()._2(); + if (r.getPid() == null) { + r.setPid(new ArrayList<>()); + } + + // TERRIBLE HACK but for some reason when we create and Array with Arrays.asList, + // it creates of fixed size, and the add method raise UnsupportedOperationException at + // java.util.AbstractList.add + final List tmp = new ArrayList<>(r.getPid()); + tmp.add(a._1()); + r.setPid(tmp); + } + } + }); + } + + public static String pidToComparableString(StructuredProperty pid) { + return (pid.getQualifier() != null + ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" + : "") + + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + } + + public static int countAuthorsPids(List authors) { + if (authors == null) + return 0; + + return (int) authors.stream().filter(DoiBoostAuthorMerger::hasPid).count(); + } + + private static int authorsSize(List authors) { + if (authors == null) + return 0; + return authors.size(); + } + + private static Double sim(Author a, Author b) { + + final Person pa = parse(a); + final Person pb = parse(b); + + // if both are accurate (e.g. they have name and surname) + if (pa.isAccurate() & pb.isAccurate()) { + return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5 + + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5; + } else { + return new JaroWinkler() + .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); + } + } + + private static boolean hasPid(Author a) { + if (a == null || a.getPid() == null || a.getPid().size() == 0) + return false; + return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); + } + + private static Person parse(Author author) { + if (StringUtils.isNotBlank(author.getSurname())) { + return new Person(author.getSurname() + ", " + author.getName(), false); + } else { + return new Person(author.getFullname(), false); + } + } + + private static String normalize(final String s) { + String[] normalized = nfd(s) + .replaceAll("[^\\p{ASCII}]", "") + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim() + .split(" "); + + Arrays.sort(normalized); + + return String.join(" ", normalized); + } + + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java new file mode 100644 index 000000000..4779a3b3a --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java @@ -0,0 +1,120 @@ + +package eu.dnetlib.dhp.doiboost; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.doiboost.DoiBoostAuthorMerger; +import eu.dnetlib.pace.util.MapDocumentUtil; +import scala.Tuple2; + +public class DoiBoostAuthorMergerTest { + + private String publicationsBasePath; + + private List> authors; + + @BeforeEach + public void setUp() throws Exception { + + publicationsBasePath = Paths + .get(DoiBoostAuthorMergerTest.class.getResource("/eu/dnetlib/dhp/doiboost").toURI()) + .toFile() + .getAbsolutePath(); + + authors = readSample(publicationsBasePath + "/matching_authors_first.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + } + + @Test + public void mergeTest() { // used in the dedup: threshold set to 0.95 + + for (List authors1 : authors) { + System.out.println("List " + (authors.indexOf(authors1) + 1)); + for (Author author : authors1) { + System.out.println(authorToString(author)); + } + } + + List merge = DoiBoostAuthorMerger.merge(authors); + + System.out.println("Merge "); + for (Author author : merge) { + System.out.println(authorToString(author)); + } + + Assertions.assertEquals(10, merge.size()); + + Assertions.assertEquals(3, merge.stream().filter(a -> a.getPid() != null).count()); + + merge + .stream() + .filter(a -> a.getPid() != null) + .forEach( + a -> Assertions + .assertTrue( + a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID)))); + merge.stream().filter(a -> a.getPid() != null).forEach(a -> { + try { + System.out.println(new ObjectMapper().writeValueAsString(a)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + + } + + public List> readSample(String path, Class clazz) { + List> res = new ArrayList<>(); + BufferedReader reader; + try { + reader = new BufferedReader(new FileReader(path)); + String line = reader.readLine(); + while (line != null) { + res + .add( + new Tuple2<>( + MapDocumentUtil.getJPathString("$.id", line), + new ObjectMapper().readValue(line, clazz))); + // read next line + line = reader.readLine(); + } + reader.close(); + } catch (IOException e) { + e.printStackTrace(); + } + + return res; + } + + public String authorToString(Author a) { + + String print = "Fullname = "; + print += a.getFullname() + " pid = ["; + if (a.getPid() != null) + for (StructuredProperty sp : a.getPid()) { + print += sp.toComparableString() + " "; + } + print += "]"; + return print; + } +} From 22ce94733549faf56ea7eab82f6c0e4bcd27e983 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 5 Jul 2021 16:26:17 +0200 Subject: [PATCH 19/37] added resource files for testing of DoiBoostAuthorMerger --- .../resources/eu/dnetlib/dhp/doiboost/matching_authors_second | 1 + 1 file changed, 1 insertion(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second new file mode 100644 index 000000000..b533f4d8a --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585329822470,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["10.1042/bcj20160876","50|doiboost____::f248499c5e0b967ce27df8ed45bffe53"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-03-27T17:23:42Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Pablo N. Romasanta","name":"Pablo N.","surname":"Romasanta","rank":1,"pid":null,"affiliation":null},{"fullname":"Lucrecia M. Curto","name":"Lucrecia M.","surname":"Curto","rank":2,"pid":[{"value":"0000-0002-4335-5309","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"María B. Sarratea","name":"María B.","surname":"Sarratea","rank":3,"pid":null,"affiliation":null},{"fullname":"Sofía Noli Truant","name":"Sofía","surname":"Noli Truant","rank":4,"pid":[{"value":"0000-0002-5490-9186","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"María B. Antonoglou","name":"María B.","surname":"Antonoglou","rank":5,"pid":null,"affiliation":null},{"fullname":"María J. Fernández Lynch","name":"María J.","surname":"Fernández Lynch","rank":6,"pid":null,"affiliation":null},{"fullname":"José M. Delfino","name":"José M.","surname":"Delfino","rank":7,"pid":null,"affiliation":null},{"fullname":"Roy A. Mariuzza","name":"Roy A.","surname":"Mariuzza","rank":8,"pid":null,"affiliation":null},{"fullname":"Marisa M. Fernández","name":"Marisa M.","surname":"Fernández","rank":9,"pid":null,"affiliation":null},{"fullname":"Emilio L. Malchiodi","name":"Emilio L.","surname":"Malchiodi","rank":10,"pid":[{"value":"0000-0001-7501-3330","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2016-11-10T13:04:33Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2016-12-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2Dd and H-2Dk). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼106 M–1) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"publisher":{"value":"Portland Press Ltd.","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://portlandpress.com/biochemj/article-pdf/474/1/179/688119/bcj-2016-0876.pdf","http://dx.doi.org/10.1042/bcj20160876"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":"1470-8728","issnLinking":null,"ep":"194","iss":null,"sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} From 7498e6317450944fc72ef16b1c744f41e6e15209 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 5 Jul 2021 16:26:46 +0200 Subject: [PATCH 20/37] added resource files for testing of DoiBoostAuthorMerger --- .../eu/dnetlib/dhp/doiboost/matching_authors_first.json | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_first.json diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_first.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_first.json new file mode 100644 index 000000000..e617a2cc6 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_first.json @@ -0,0 +1,2 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585329822470,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["10.1042/bcj20160876","50|doiboost____::f248499c5e0b967ce27df8ed45bffe53"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-03-27T17:23:42Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Pablo N. Romasanta","name":"Pablo N.","surname":"Romasanta","rank":1,"pid":null,"affiliation":null},{"fullname":"Lucrecia M. Curto","name":"Lucrecia M.","surname":"Curto","rank":2,"pid":null,"affiliation":null},{"fullname":"María B. Sarratea","name":"María B.","surname":"Sarratea","rank":3,"pid":null,"affiliation":null},{"fullname":"Sofía Noli Truant","name":"Sofía","surname":"Noli Truant","rank":4,"pid":null,"affiliation":null},{"fullname":"María B. Antonoglou","name":"María B.","surname":"Antonoglou","rank":5,"pid":null,"affiliation":null},{"fullname":"María J. Fernández Lynch","name":"María J.","surname":"Fernández Lynch","rank":6,"pid":null,"affiliation":null},{"fullname":"José M. Delfino","name":"José M.","surname":"Delfino","rank":7,"pid":null,"affiliation":null},{"fullname":"Roy A. Mariuzza","name":"Roy A.","surname":"Mariuzza","rank":8,"pid":null,"affiliation":null},{"fullname":"Marisa M. Fernández","name":"Marisa M.","surname":"Fernández","rank":9,"pid":null,"affiliation":null},{"fullname":"Emilio L. Malchiodi","name":"Emilio L.","surname":"Malchiodi","rank":10,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2016-11-10T13:04:33Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2016-12-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2Dd and H-2Dk). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼106 M–1) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"publisher":{"value":"Portland Press Ltd.","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://portlandpress.com/biochemj/article-pdf/474/1/179/688119/bcj-2016-0876.pdf","http://dx.doi.org/10.1042/bcj20160876"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":"1470-8728","issnLinking":null,"ep":"194","iss":null,"sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doiboost____::f248499c5e0b967ce27df8ed45bffe53","originalId":null,"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Lucrecia Curto","name":"Lucrecia","surname":"Curto","rank":null,"pid":[{"value":"0000-0002-4335-5309","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Emilio Malchiodi","name":"Emilio","surname":"Malchiodi","rank":null,"pid":[{"value":"0000-0001-7501-3330","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sofia Noli Truant","name":"Sofia","surname":"Noli Truant","rank":null,"pid":[{"value":"0000-0002-5490-9186","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null,"journal":null} \ No newline at end of file From 70ded407bbefbc3c777fc82697c08a7254a64df8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Jul 2021 18:04:30 +0200 Subject: [PATCH 21/37] HttpClient used in metadata collection retries also on 404 --- .../eu/dnetlib/dhp/collection/HttpConnector2.java | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java index a84b26955..a61e2032c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java @@ -131,18 +131,9 @@ public class HttpConnector2 { } return attemptDownload(newUrl, retryNumber + 1, report); } - if (is4xx(urlConn.getResponseCode())) { - // CLIENT ERROR, DO NOT RETRY - report - .put( - REPORT_PREFIX + urlConn.getResponseCode(), - String - .format( - "%s error: %s", requestUrl, urlConn.getResponseMessage())); - throw new CollectorException("4xx error: request will not be repeated. " + report); - } - if (is5xx(urlConn.getResponseCode())) { + if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) { switch (urlConn.getResponseCode()) { + case HttpURLConnection.HTTP_NOT_FOUND: case HttpURLConnection.HTTP_BAD_GATEWAY: case HttpURLConnection.HTTP_UNAVAILABLE: case HttpURLConnection.HTTP_GATEWAY_TIMEOUT: From f580cb77e101489ca3ba33ab40ea1fa6f3a84b0d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 6 Jul 2021 21:11:11 +0200 Subject: [PATCH 22/37] added mapping for claim relation 'resultResult_publicationDataset_isRelatedTo' (present on BETA) --- .../dhp/oa/graph/raw/MigrateDbEntitiesApplication.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 1a55499c1..8f1a97984 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -529,6 +529,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r2.setSubRelType(OUTCOME); r2.setRelClass(IS_PRODUCED_BY); break; + case "resultResult_publicationDataset_isRelatedTo": + r1.setRelClass(RESULT_RESULT); + r1.setSubRelType(PUBLICATION_DATASET); + r1.setRelClass(IS_RELATED_TO); + + r2.setRelClass(RESULT_RESULT); + r2.setSubRelType(PUBLICATION_DATASET); + r2.setRelClass(IS_RELATED_TO); + break; default: throw new IllegalArgumentException("claim semantics not managed: " + semantics); } From 32bdfdccbc76fd1c166f85fa426042d191f624d7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 7 Jul 2021 11:08:27 +0200 Subject: [PATCH 23/37] [raw_all] Aggregator graph creation merges claims (updates) with the corresponding entity --- .../raw/MigrateDbEntitiesApplication.java | 70 ++++++++----------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 8f1a97984..c1c8e602c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -480,38 +480,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false); final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false); - final Relation r1 = new Relation(); - final Relation r2 = new Relation(); - - if (StringUtils.isNotBlank(validationDate)) { - r1.setValidated(true); - r1.setValidationDate(validationDate); - r2.setValidated(true); - r2.setValidationDate(validationDate); - } - r1.setCollectedfrom(COLLECTED_FROM_CLAIM); - r1.setSource(sourceId); - r1.setTarget(targetId); - r1.setDataInfo(DATA_INFO_CLAIM); - r1.setLastupdatetimestamp(lastUpdateTimestamp); - - r2.setCollectedfrom(COLLECTED_FROM_CLAIM); - r2.setSource(targetId); - r2.setTarget(sourceId); - r2.setDataInfo(DATA_INFO_CLAIM); - r2.setLastupdatetimestamp(lastUpdateTimestamp); + Relation r1 = prepareRelation(sourceId, targetId, validationDate); + Relation r2 = prepareRelation(targetId, sourceId, validationDate); final String semantics = rs.getString("semantics"); switch (semantics) { case "resultResult_relationship_isRelatedTo": - r1.setRelType(RESULT_RESULT); - r1.setSubRelType(RELATIONSHIP); - r1.setRelClass(IS_RELATED_TO); - - r2.setRelType(RESULT_RESULT); - r2.setSubRelType(RELATIONSHIP); - r2.setRelClass(IS_RELATED_TO); + r1 = setRelationSemantic(r1, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO); + r2 = setRelationSemantic(r2, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO); break; case "resultProject_outcome_produces": if (!"project".equals(sourceType)) { @@ -521,22 +498,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i "invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId, semantics)); } - r1.setRelType(RESULT_PROJECT); - r1.setSubRelType(OUTCOME); - r1.setRelClass(PRODUCES); - - r2.setRelType(RESULT_PROJECT); - r2.setSubRelType(OUTCOME); - r2.setRelClass(IS_PRODUCED_BY); + r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES); + r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY); break; case "resultResult_publicationDataset_isRelatedTo": - r1.setRelClass(RESULT_RESULT); - r1.setSubRelType(PUBLICATION_DATASET); - r1.setRelClass(IS_RELATED_TO); - - r2.setRelClass(RESULT_RESULT); - r2.setSubRelType(PUBLICATION_DATASET); - r2.setRelClass(IS_RELATED_TO); + r1 = setRelationSemantic(r1, RESULT_PROJECT, PUBLICATION_DATASET, IS_RELATED_TO); + r2 = setRelationSemantic(r2, RESULT_PROJECT, PUBLICATION_DATASET, IS_RELATED_TO); break; default: throw new IllegalArgumentException("claim semantics not managed: " + semantics); @@ -549,6 +516,27 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } + private Relation prepareRelation(String sourceId, String targetId, String validationDate) { + Relation r = new Relation(); + if (StringUtils.isNotBlank(validationDate)) { + r.setValidated(true); + r.setValidationDate(validationDate); + } + r.setCollectedfrom(COLLECTED_FROM_CLAIM); + r.setSource(sourceId); + r.setTarget(targetId); + r.setDataInfo(DATA_INFO_CLAIM); + r.setLastupdatetimestamp(lastUpdateTimestamp); + return r; + } + + private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) { + r.setRelType(relType); + r.setSubRelType(subRelType); + r.setRelClass(relClass); + return r; + } + private List prepareContext(final String id, final DataInfo dataInfo) { final Context context = new Context(); context.setId(id); From 777536ce9180dc7fe6e4c5351d312fac5bd45aef Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 7 Jul 2021 11:23:40 +0200 Subject: [PATCH 24/37] [aggregation] string values used as regular expressions in the OAI collection classes are defined in a single point as constants, to be reused across the code (PR#122) --- .../dhp/collection/plugin/oai/OaiCollectorPlugin.java | 7 +++++-- .../eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 67fd352a3..9918e4abe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; public class OaiCollectorPlugin implements CollectorPlugin { + public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}"; + public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"; + private static final String FORMAT_PARAM = "format"; private static final String OAI_SET_PARAM = "set"; private static final Object OAI_FROM_DATE_PARAM = "fromDate"; @@ -62,11 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin { throw new CollectorException("Param 'mdFormat' is null or empty"); } - if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}") && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) { + if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); } - if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}") && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) { + if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index c044e02db..75dd746ea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -107,10 +107,12 @@ public class OaiIterator implements Iterator { if (set != null && !set.isEmpty()) { url += "&set=" + URLEncoder.encode(set, "UTF-8"); } - if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}") || fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) { + if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) + || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); } - if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}") || untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) { + if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) + || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); } log.info("Start harvesting using url: " + url); From fdcff42e46028a5e7201183ec5dc2433bdf10052 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 7 Jul 2021 19:01:59 +0200 Subject: [PATCH 25/37] [raw_all] Aggregator graph creation merges claims (updates) with the corresponding entity --- .../dhp/oa/graph/raw/MigrateDbEntitiesApplication.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index c1c8e602c..a9d3e05fe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -502,8 +502,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY); break; case "resultResult_publicationDataset_isRelatedTo": - r1 = setRelationSemantic(r1, RESULT_PROJECT, PUBLICATION_DATASET, IS_RELATED_TO); - r2 = setRelationSemantic(r2, RESULT_PROJECT, PUBLICATION_DATASET, IS_RELATED_TO); + r1 = setRelationSemantic(r1, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO); + r2 = setRelationSemantic(r2, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO); break; default: throw new IllegalArgumentException("claim semantics not managed: " + semantics); From b7b8e0986ec81574f23c7b8073b96aff719ed7d6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 8 Jul 2021 10:42:31 +0200 Subject: [PATCH 26/37] [raw_all] The claim merge procedure includes the claimed contexts in the merged result --- .../oa/graph/raw/MergeClaimsApplication.java | 87 +++++++++++++++++-- 1 file changed, 79 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java index 9b99097ce..d5c310c1b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java @@ -3,8 +3,12 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -98,14 +102,9 @@ public class MergeClaimsApplication { raw .joinWith(claim, raw.col("_1").equalTo(claim.col("_1")), "full_outer") .map( - (MapFunction, Tuple2>, T>) value -> { - Optional> opRaw = Optional.ofNullable(value._1()); - Optional> opClaim = Optional.ofNullable(value._2()); - - return opRaw.isPresent() - ? opRaw.get()._2() - : opClaim.isPresent() ? opClaim.get()._2() : null; - }, + (MapFunction, Tuple2>, T>) value -> processClaims( + Optional.ofNullable(value._1()), + Optional.ofNullable(value._2())), Encoders.bean(clazz)) .filter(Objects::nonNull) .map( @@ -117,6 +116,78 @@ public class MergeClaimsApplication { .text(outPath); } + private static T processClaims(Optional> opRaw, + Optional> opClaim) { + + // when both are present + if (opClaim.isPresent() && opRaw.isPresent()) { + T oafClaim = opClaim.get()._2(); + if (oafClaim instanceof Result) { + T oafRaw = opRaw.get()._2(); + + // merge the context lists from both oaf objects ... + final List context = mergeContexts((Result) oafClaim, (Result) oafRaw); + + // ... and set it on the result from the aggregator + ((Result) oafRaw).setContext(context); + return oafRaw; + } + } + + // otherwise prefer the result from the aggregator + return opRaw.isPresent() + ? opRaw.get()._2() + : opClaim.map(Tuple2::_2).orElse(null); + } + + private static List mergeContexts(Result oafClaim, Result oafRaw) { + return new ArrayList<>( + Stream + .concat( + Optional + .ofNullable(oafClaim.getContext()) + .map(List::stream) + .orElse(Stream.empty()), + Optional + .ofNullable(oafRaw.getContext()) + .map(List::stream) + .orElse(Stream.empty())) + .collect( + Collectors + .toMap( + Context::getId, + c -> c, + (c1, c2) -> { + Context c = new Context(); + c.setId(c1.getId()); + c + .setDataInfo( + new ArrayList<>( + Stream + .concat( + Optional + .ofNullable(c1.getDataInfo()) + .map(List::stream) + .orElse(Stream.empty()), + Optional + .ofNullable(c2.getDataInfo()) + .map(List::stream) + .orElse(Stream.empty())) + .collect( + Collectors + .toMap( + d -> Optional + .ofNullable(d.getProvenanceaction()) + .map(Qualifier::getClassid) + .orElse(""), + d -> d, + (d1, d2) -> d1)) + .values())); + return c; + })) + .values()); + } + private static Dataset readFromPath( SparkSession spark, String path, Class clazz) { return spark From 97e0c27db954d3adde7f0d4c1b271c28d99cf996 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Jul 2021 15:27:05 +0200 Subject: [PATCH 27/37] Added check for empty author list. If crossref is empty, the longest from all the merging providers is taken. If crossref is not empty, crossref is chosen as base for the enrichment --- .../doiboost/DoiBoostAuthorMerger.java | 226 ++++++++++-------- 1 file changed, 129 insertions(+), 97 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java index 741df13ff..537dc11a3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java @@ -5,101 +5,159 @@ import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; -import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.utils.DHPUtils; import com.wcohen.ss.JaroWinkler; -import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.pace.model.Person; + import scala.Tuple2; public class DoiBoostAuthorMerger { - private static final Double THRESHOLD = 0.95; - public static List merge(List> authors) { + public static List merge(List> authors, Boolean crossref) { Iterator> it = authors.iterator(); - final List author = it.next(); + List author = it.next(); - it.forEachRemaining(autList -> enrichPidFromList(author, autList, THRESHOLD)); + while (it.hasNext()){ + List autList = it.next(); + Tuple2, Boolean> tmp = mergeAuthor(author, autList, crossref); + author = tmp._1(); + crossref = tmp._2(); + } return author; } - public static List mergeAuthor(final List crossrefAuthor, final List otherAuthor, - Double threshold) { + public static Tuple2, Boolean> mergeAuthor(final List baseAuthor, final List otherAuthor, + final Boolean crossref) { + + if(baseAuthor == null || baseAuthor.size() == 0) + return new Tuple2<>(otherAuthor, false); + if(otherAuthor == null || otherAuthor.size() == 0) + return new Tuple2<>(baseAuthor, crossref); + + if(crossref) { + enrichPidFromList(baseAuthor, otherAuthor); + return new Tuple2<>(baseAuthor, true); + } + else + if (baseAuthor.size() > otherAuthor.size()){ + enrichPidFromList(baseAuthor, otherAuthor); + return new Tuple2<>(baseAuthor, false); + }else{ + enrichPidFromList(otherAuthor, baseAuthor); + return new Tuple2<>(otherAuthor, false); + } - enrichPidFromList(crossrefAuthor, otherAuthor, threshold); - return crossrefAuthor; } - public static List mergeAuthor(final List crossrefAuthor, final List otherAuthor) { - return mergeAuthor(crossrefAuthor, otherAuthor, THRESHOLD); - } - private static void enrichPidFromList(List base, List enrich, Double threshold) { - if (base == null || enrich == null) - return; + private static void enrichPidFromList(List base, List enrich) { + if(base == null || enrich == null) + return ; - // (if an Author has more than 1 pid, it appears 2 times in the list) - final Map basePidAuthorMap = base - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap( - a -> a - .getPid() - .stream() - .map(p -> new Tuple2<>(pidToComparableString(p), a))) - .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); + //search authors having identifiers in the enrich list + final List authorsWithPids = enrich + .stream() + .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .collect(Collectors.toList()); - // (list of pid that are missing in the other list) - final List> pidToEnrich = enrich - .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) - .flatMap( - a -> a - .getPid() - .stream() - .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) - .map(p -> new Tuple2<>(p, a))) - .collect(Collectors.toList()); + Map assocMap = authorsWithPids + .stream() + .map( + a -> new Tuple2<>(DHPUtils.md5(a.getFullname()), AuthorAssoc.newInstance(a))) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); - pidToEnrich - .forEach( - a -> { - Optional> simAuthor = base - .stream() - .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) - .max(Comparator.comparing(Tuple2::_1)); - if (simAuthor.isPresent()) { - double th = threshold; - // increase the threshold if the surname is too short - if (simAuthor.get()._2().getSurname() != null - && simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0) - th = 0.99; + //for each author in the base list, we search the best enriched match + base.stream() + .map(a -> new Tuple2<>(a, authorsWithPids.stream() + .map(e -> new Tuple2<>(e, sim(a, e))).collect(Collectors.toList()))) + .forEach(t2 -> { - if (simAuthor.get()._1() > th) { - Author r = simAuthor.get()._2(); - if (r.getPid() == null) { - r.setPid(new ArrayList<>()); - } - - // TERRIBLE HACK but for some reason when we create and Array with Arrays.asList, - // it creates of fixed size, and the add method raise UnsupportedOperationException at - // java.util.AbstractList.add - final List tmp = new ArrayList<>(r.getPid()); - tmp.add(a._1()); - r.setPid(tmp); + for (Tuple2 t : t2._2()) { + String mapEntry = DHPUtils.md5(t._1().getFullname()); + AuthorAssoc aa = assocMap.get(mapEntry); + if(aa.getScore() < t._2()){ + aa.setScore(t._2()); + aa.setTo_be_enriched(new ArrayList<>()); + aa.getTo_be_enriched().add(t2._1()); + }else if(aa.getScore() == t._2()){ + aa.getTo_be_enriched().add(t2._1()); } + } + + }); + + assocMap.keySet().forEach(k -> enrichAuthor(assocMap.get(k))); + + + } + + private static long getCommonWords(List fullEnrich, List fullEnriching){ + return fullEnrich.stream().filter( w -> fullEnriching.contains(w)).count(); + } + + + private static void enrichAuthor(Author enrich, Author enriching){ + //verify if some of the words in the fullname are contained in the other + //get normalized fullname + + long commonWords = getCommonWords(normalize(enrich.getFullname()), + normalize(enriching.getFullname())); + if(commonWords > 0 ){ + if(enrich.getPid() == null){ + enrich.setPid(new ArrayList<>()); + } + Set aPids = enrich.getPid().stream().map(p -> pidToComparableString(p)).collect(Collectors.toSet()); + enriching.getPid().forEach(p -> { + if (!aPids.contains(pidToComparableString(p))){ + enrich.getPid().add(p); } }); + if (enrich.getAffiliation() == null){ + if (enriching.getAffiliation() != null){ + enrich.setAffiliation(enriching.getAffiliation()); + } + } + } + + } + //Verify the number of words in common. The one that has more, wins. If the number of words in common are the same we + //enrich no author + private static void enrichAuthor(AuthorAssoc authorAssoc) { + if (authorAssoc.getTo_be_enriched().size() == 1){ + enrichAuthor(authorAssoc.getTo_be_enriched().get(0), authorAssoc.getWith_enricheing_content()); + }else{ + long common = 0; + List selected = new ArrayList<>() ; + for(Author a : authorAssoc.getTo_be_enriched()){ + long current_common = getCommonWords(normalize(a.getFullname()), + normalize(authorAssoc.getWith_enricheing_content().getFullname())); + if (current_common > common){ + common = current_common; + selected = new ArrayList<>(); + selected.add(a); + }else if(current_common == common){ + selected.add(a); + } + } + if (selected.size() == 1){ + enrichAuthor(selected.get(0), authorAssoc.getWith_enricheing_content()); + } + } + + } + + public static String pidToComparableString(StructuredProperty pid) { return (pid.getQualifier() != null ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" @@ -107,49 +165,21 @@ public class DoiBoostAuthorMerger { + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); } - public static int countAuthorsPids(List authors) { - if (authors == null) - return 0; - return (int) authors.stream().filter(DoiBoostAuthorMerger::hasPid).count(); - } - private static int authorsSize(List authors) { - if (authors == null) - return 0; - return authors.size(); - } private static Double sim(Author a, Author b) { - - final Person pa = parse(a); - final Person pb = parse(b); - - // if both are accurate (e.g. they have name and surname) - if (pa.isAccurate() & pb.isAccurate()) { - return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5 - + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5; - } else { return new JaroWinkler() - .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); - } + .score(normalizeString(a.getFullname()), normalizeString(b.getFullname())); + } - private static boolean hasPid(Author a) { - if (a == null || a.getPid() == null || a.getPid().size() == 0) - return false; - return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); + private static String normalizeString(String fullname) { + return String.join(" ", normalize(fullname)); } - private static Person parse(Author author) { - if (StringUtils.isNotBlank(author.getSurname())) { - return new Person(author.getSurname() + ", " + author.getName(), false); - } else { - return new Person(author.getFullname(), false); - } - } - private static String normalize(final String s) { + private static List normalize(final String s) { String[] normalized = nfd(s) .replaceAll("[^\\p{ASCII}]", "") .toLowerCase() @@ -166,7 +196,9 @@ public class DoiBoostAuthorMerger { Arrays.sort(normalized); - return String.join(" ", normalized); + return Arrays.asList(normalized); + + } private static String nfd(final String s) { From e0e80cde22d850a2588dfca076b773dd2c0e60dc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Jul 2021 18:52:25 +0200 Subject: [PATCH 28/37] Added class to store the most similar author list to be enriched w.r.t. one enriching author (related to DoiBoostAuthorMerger) --- .../java/eu/dnetlib/doiboost/AuthorAssoc.java | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/AuthorAssoc.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/AuthorAssoc.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/AuthorAssoc.java new file mode 100644 index 000000000..807567fba --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/AuthorAssoc.java @@ -0,0 +1,47 @@ +package eu.dnetlib.doiboost; + +import eu.dnetlib.dhp.schema.oaf.Author; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +public class AuthorAssoc implements Serializable { + private Double score ; + private List to_be_enriched; + private Author with_enricheing_content; + + public Double getScore() { + return score; + } + + public void setScore(Double score) { + this.score = score; + } + + public List getTo_be_enriched() { + return to_be_enriched; + } + + public void setTo_be_enriched(List to_be_enriched) { + this.to_be_enriched = to_be_enriched; + } + + public Author getWith_enricheing_content() { + return with_enricheing_content; + } + + public void setWith_enricheing_content(Author with_enricheing_content) { + this.with_enricheing_content = with_enricheing_content; + } + + public static AuthorAssoc newInstance(Author a){ + AuthorAssoc ret = new AuthorAssoc(); + ret.score = 0.0; + ret.to_be_enriched = new ArrayList<>(); + ret.with_enricheing_content = a; + + return ret; + + } +} From 434aa6380b2ba5b0843e6ab2338dbda3bd60df94 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Jul 2021 18:53:15 +0200 Subject: [PATCH 29/37] Adding description of the merging process for DoiBoost (related to DoiBoostAuthorMerger) - to be refined --- .../doiboost/DoiBoostAuthorMerger.java | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java index 537dc11a3..578adb06b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java @@ -15,6 +15,37 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import scala.Tuple2; +/** + * This is a version of the AuthorMerger specific for DoiBoost. + * Here we suppose a match must exist for the authors. We compare via JaroWrinkler similarity measure each author in the list + * that should be enriched with each author in the enriching list. For each enriching author we select the best match that is + * the author with the highest similarity score. + * The association is done from the enriching author to the enriched because in this way only one match per enriching author can be found + * One enriching author can have the same maximum similarity score with more than one + * + * + * + * + * The idea is to enrich the most similar authors having at least one + * word of the name in + * common + * Quello che faccio e’ abbastanza semplice: ho una struttura dati che mantine l’informazione di associazione fra il record che puo’ possibilmente arricchire e quello che deve essere arricchito. + * 6:22 + * Questa struttura ha la lista di autori che possono essere arricchiti, l’autore che arricchisce e lo score di similarita fra l’autore che arricchisce e gli autori arricchiti. E’ il valore di una mappa che per chiave la il fullname dell’autore che arricchisce + * 6:23 + * per ogni autore che puo’ essere arricchito verifico se la entri nella mappa di quello che arricchisce e’ associata ad un autore con score di similarita’ piu’ basso. Se cosi’ e’ modifico l’associazione nella mappa per l’autore che arricchisce, sostituendo l’autore arricchito a cui era associato prima con quello nuovo che ha score piu’ alto. Se lo score e’ lo stesso, aggiungo il nuovo autore da arricchire alla lista degli autori associata all’autore che arricchisce + * 6:25 + * Alla fine caso facile: ogni entry e’ associata ad un unico autore da arricchire => verifico che almeno una delle parole che sono nei due nomi sia in comune fra i due insiemi Se e’ cosi’, aggiungo i pid mancanti all’autore da arricchire dell’autore che arricchisce + * 6:26 + * caso brutto: ci sono piu’ autori da arricchire con la stessa similarita: arricchisco quello che ha il maggior numero di parole del fullname uguali a quelle dell’autore che arricchisce. In caso di parita’ non si arricchisce + * 6:28 + * ricordiamoci che si parte dal presupposto che un match debba esistere visto che abbiamo lo stesso doi + * 6:29 + * di conseguenza l’autore che ha lo score di similarita’ piu’ alto fra quelli presenti ed anche una parola in comune del nome dovrebbe essere sufficiente per poterlo arricchire. + * 6:30 + * I casi di omonimia che potrebbero portare problemi con i rank degli autori non si mappano + */ + public class DoiBoostAuthorMerger { From 0e47e9409973375f3ee83d2d6f39bb0a36ec5bcf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Jul 2021 18:54:07 +0200 Subject: [PATCH 30/37] Added variable to verify if crossref is base for the merging of authors (related to DoiBoostAuthorMerger) --- .../java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index ec9ae455e..deda8c987 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -3,7 +3,7 @@ package eu.dnetlib.doiboost import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Author, Organization, Publication, Relation, Dataset => OafDataset} import eu.dnetlib.doiboost.mag.ConversionUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf @@ -25,6 +25,7 @@ object SparkGenerateDoiBoost { val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json"))) parser.parseArgument(args) + var crossref : Boolean = true val spark: SparkSession = SparkSession .builder() @@ -104,7 +105,10 @@ object SparkGenerateDoiBoost { val otherPub = item._2._2 if (otherPub != null) { crossrefPub.mergeFrom(otherPub) - crossrefPub.setAuthor(DoiBoostAuthorMerger.mergeAuthor(crossrefPub.getAuthor, otherPub.getAuthor)) + val mergeRes : (java.util.List[Author], java.lang.Boolean) = DoiBoostAuthorMerger.mergeAuthor(crossrefPub.getAuthor, otherPub.getAuthor, crossref) + crossrefPub.setAuthor(mergeRes._1) + crossref = mergeRes._2 + } } crossrefPub From 96255fa647ac7badc43bae0cf2c01a2543c67234 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Jul 2021 18:54:27 +0200 Subject: [PATCH 31/37] - --- .../resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml index fa47e142d..1ac17dff9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml @@ -111,7 +111,7 @@ ${wf:conf('resumeFrom') eq 'CreateDOIBoost'} ${wf:conf('resumeFrom') eq 'GenerateActionSet'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} - + @@ -319,7 +319,7 @@ --executor-memory=${sparkExecutorIntersectionMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From bf24f588e2ad4a1e4bd1a774f00683dbedf1aa99 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Jul 2021 18:55:13 +0200 Subject: [PATCH 32/37] Added test for empty author list for crossref and other merging providers (related to DoiBoostAuthorMerger) --- .../doiboost/DoiBoostAuthorMergerTest.java | 204 +++++++++++++++++- 1 file changed, 197 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java index 4779a3b3a..7be63e93e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java @@ -9,6 +9,7 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +import org.apache.neethi.Assertion; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -30,6 +31,7 @@ public class DoiBoostAuthorMergerTest { private List> authors; + @BeforeEach public void setUp() throws Exception { @@ -38,15 +40,15 @@ public class DoiBoostAuthorMergerTest { .toFile() .getAbsolutePath(); - authors = readSample(publicationsBasePath + "/matching_authors_first.json", Publication.class) - .stream() - .map(p -> p._2().getAuthor()) - .collect(Collectors.toList()); - } @Test - public void mergeTest() { // used in the dedup: threshold set to 0.95 + public void mergeTestOrcid() { + + authors = readSample(publicationsBasePath + "/matching_authors_first.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); for (List authors1 : authors) { System.out.println("List " + (authors.indexOf(authors1) + 1)); @@ -55,7 +57,7 @@ public class DoiBoostAuthorMergerTest { } } - List merge = DoiBoostAuthorMerger.merge(authors); + List merge = DoiBoostAuthorMerger.merge(authors, true); System.out.println("Merge "); for (Author author : merge) { @@ -117,4 +119,192 @@ public class DoiBoostAuthorMergerTest { print += "]"; return print; } + + @Test + public void mergeTestMAG() { + + authors = readSample(publicationsBasePath + "/matching_authors_second", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + for (List authors1 : authors) { + System.out.println("List " + (authors.indexOf(authors1) + 1)); + for (Author author : authors1) { + System.out.println(authorToString(author)); + } + } + + List merge = DoiBoostAuthorMerger.merge(authors, true); + + System.out.println("Merge "); + for (Author author : merge) { + System.out.println(authorToString(author)); + } + + Assertions.assertEquals(10, merge.size()); + + Assertions.assertEquals(10, merge.stream().filter(a -> a.getPid() != null).count()); + + merge + .stream() + .filter(a -> a.getPid() != null) + .forEach( + a -> Assertions + .assertTrue( + a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals("URL")))); + merge.stream().filter(a -> a.getPid() != null).forEach(a -> { + try { + System.out.println(new ObjectMapper().writeValueAsString(a)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + + } + + + @Test + public void mergeTestCrossrefEmpty1() throws JsonProcessingException { + + authors = readSample(publicationsBasePath + "/empty_crossref_authors_first.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + + List merge = DoiBoostAuthorMerger.merge(authors, true); + + System.out.println("Merge "); + for (Author author : merge) { + System.out.println(authorToString(author)); + } + + Assertions.assertEquals(3, merge.size()); + + Assertions.assertEquals(3, merge.stream().filter(a -> a.getPid() != null).count()); + + merge + .stream() + .filter(a -> a.getPid() != null) + .forEach( + a -> Assertions + .assertTrue( + a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID)))); + merge.stream().filter(a -> a.getPid() != null).forEach(a -> { + try { + System.out.println(new ObjectMapper().writeValueAsString(a)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + + System.out.println(new ObjectMapper().writeValueAsString(merge)); + + } + + + @Test + public void mergeTestCrossrefEmpty2() throws JsonProcessingException { + + authors = readSample(publicationsBasePath + "/empty_crossref_authors_second.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + + + List merge = DoiBoostAuthorMerger.merge(authors, false); + + System.out.println("Merge "); + for (Author author : merge) { + System.out.println(authorToString(author)); + } + + Assertions.assertEquals(10, merge.size()); + + Assertions.assertEquals(10, merge.stream().filter(a -> a.getPid() != null).count()); + + merge + .stream() + .filter(a -> a.getPid() != null) + .forEach( + a -> Assertions + .assertTrue( + a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals("URL")))); + merge.stream().filter(a -> a.getPid() != null).forEach(a -> { + try { + System.out.println(new ObjectMapper().writeValueAsString(a)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + + Assertions.assertTrue(3 == merge.stream().filter(a -> a.getPid() !=null) + .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count()); + + } + + @Test + public void mergeTestCrossrefEmpty3() throws JsonProcessingException { + + authors = readSample(publicationsBasePath + "/empty_crossref_author_third.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + + List merge = DoiBoostAuthorMerger.merge(authors, true); + + System.out.println("Merge "); + for (Author author : merge) { + System.out.println(authorToString(author)); + } + + Assertions.assertEquals(10, merge.size()); + + Assertions.assertEquals(10, merge.stream().filter(a -> a.getPid() != null).count()); + + merge + .stream() + .filter(a -> a.getPid() != null) + .forEach( + a -> Assertions + .assertTrue( + a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals("URL")))); + + Assertions.assertTrue(3 == merge.stream().filter(a -> a.getPid() !=null) + .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count()); + + + } + + + @Test + public void mergeTestCrossrefEmpty4() throws JsonProcessingException { + + authors = readSample(publicationsBasePath + "/empty_crossref_author_fourth.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + + List merge = DoiBoostAuthorMerger.merge(authors, true); + + System.out.println("Merge "); + for (Author author : merge) { + System.out.println(authorToString(author)); + } + + Assertions.assertEquals(3, merge.size()); + + Assertions.assertEquals(3, merge.stream().filter(a -> a.getPid() != null).count()); + + + Assertions.assertTrue(3 == merge.stream().filter(a -> a.getPid() !=null) + .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count()); + + + } + } From abe546e5bad175926ab9a519d49521e4f1fbc64b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Jul 2021 18:55:55 +0200 Subject: [PATCH 33/37] added resource files for test author merger for empy crossref and other merging providers (related to DoiBoostAuthorMerger) --- .../eu/dnetlib/dhp/doiboost/empty_crossref_author_fourth.json | 3 +++ .../eu/dnetlib/dhp/doiboost/empty_crossref_author_third.json | 3 +++ .../eu/dnetlib/dhp/doiboost/empty_crossref_authors_first.json | 2 ++ .../eu/dnetlib/dhp/doiboost/empty_crossref_authors_second.json | 2 ++ 4 files changed, 10 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_fourth.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_third.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_first.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_second.json diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_fourth.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_fourth.json new file mode 100644 index 000000000..0cabcaf21 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_fourth.json @@ -0,0 +1,3 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585329822470,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["10.1042/bcj20160876","50|doiboost____::f248499c5e0b967ce27df8ed45bffe53"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-03-27T17:23:42Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":null,"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2016-11-10T13:04:33Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2016-12-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2Dd and H-2Dk). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼106 M–1) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"publisher":{"value":"Portland Press Ltd.","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://portlandpress.com/biochemj/article-pdf/474/1/179/688119/bcj-2016-0876.pdf","http://dx.doi.org/10.1042/bcj20160876"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":"1470-8728","issnLinking":null,"ep":"194","iss":null,"sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doiboost____::f248499c5e0b967ce27df8ed45bffe53","originalId":null,"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Lucrecia Curto","name":"Lucrecia","surname":"Curto","rank":null,"pid":[{"value":"0000-0002-4335-5309","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Emilio Malchiodi","name":"Emilio","surname":"Malchiodi","rank":null,"pid":[{"value":"0000-0001-7501-3330","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sofia Noli Truant","name":"Sofia","surname":"Noli Truant","rank":null,"pid":[{"value":"0000-0002-5490-9186","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null,"journal":null} +{"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["2556626411","10.1042/bcj20160876"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":null,"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":[{"value":"Innate immune system","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Circular dichroism","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Ligand","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Biophysics","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Surface plasmon resonance","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Binding site","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Receptor","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Major histocompatibility complex","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"biology.protein","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.54379076","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.54379076","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"MHC class I","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"biology.protein","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.55428535","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.55428535","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Stereochemistry","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"kinetic and thermodynamic studies of the interaction between activating and inhibitory ly49 natural killer receptors and mhc class i molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null},{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":null,"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2D d and H-2D k ). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼10 6  M –1 ) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2017-01-01","dataInfo":null},"publisher":{"value":"Portland Press Limited","dataInfo":null},"embargoenddate":null,"source":[{"value":null,"dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":null,"instancetype":null,"hostedby":null,"url":["https://pubmed.ncbi.nlm.nih.gov/27831490/","https://www.ncbi.nlm.nih.gov/pubmed/27831490","https://ri.conicet.gov.ar/handle/11336/47496","https://portlandpress.com/biochemj/article/474/1/179/49423/Kinetic-and-thermodynamic-studies-of-the","https://academic.microsoft.com/#/detail/2556626411"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null},"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":null,"issnLinking":null,"ep":"194","iss":"1","sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_third.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_third.json new file mode 100644 index 000000000..6326adbd6 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_third.json @@ -0,0 +1,3 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585329822470,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["10.1042/bcj20160876","50|doiboost____::f248499c5e0b967ce27df8ed45bffe53"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-03-27T17:23:42Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":null,"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2016-11-10T13:04:33Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2016-12-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2Dd and H-2Dk). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼106 M–1) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"publisher":{"value":"Portland Press Ltd.","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://portlandpress.com/biochemj/article-pdf/474/1/179/688119/bcj-2016-0876.pdf","http://dx.doi.org/10.1042/bcj20160876"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":"1470-8728","issnLinking":null,"ep":"194","iss":null,"sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doiboost____::f248499c5e0b967ce27df8ed45bffe53","originalId":null,"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Lucrecia Curto","name":"Lucrecia","surname":"Curto","rank":null,"pid":[{"value":"0000-0002-4335-5309","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Emilio Malchiodi","name":"Emilio","surname":"Malchiodi","rank":null,"pid":[{"value":"0000-0001-7501-3330","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sofia Noli Truant","name":"Sofia","surname":"Noli Truant","rank":null,"pid":[{"value":"0000-0002-5490-9186","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null,"journal":null} +{"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["2556626411","10.1042/bcj20160876"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"María Belén Antonoglou","name":null,"surname":null,"rank":5,"pid":[{"value":"https://academic.microsoft.com/#/detail/2563193871","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Sofia Noli Truant","name":null,"surname":null,"rank":4,"pid":[{"value":"https://academic.microsoft.com/#/detail/2562625705","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"José M. Delfino","name":null,"surname":null,"rank":7,"pid":[{"value":"https://academic.microsoft.com/#/detail/2290824706","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Maria Belén Sarratea","name":null,"surname":null,"rank":3,"pid":[{"value":"https://academic.microsoft.com/#/detail/2253085369","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"María Julieta Fernández Lynch","name":null,"surname":null,"rank":6,"pid":[{"value":"https://academic.microsoft.com/#/detail/2566594710","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Roy A. Mariuzza","name":null,"surname":null,"rank":8,"pid":[{"value":"https://academic.microsoft.com/#/detail/298169305","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Maryland, College Park","dataInfo":null}]},{"fullname":"Lucrecia María Curto","name":null,"surname":null,"rank":2,"pid":[{"value":"https://academic.microsoft.com/#/detail/2559673648","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Emilio L. Malchiodi","name":null,"surname":null,"rank":10,"pid":[{"value":"https://academic.microsoft.com/#/detail/1850809118","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Marisa M. Fernández","name":null,"surname":null,"rank":9,"pid":[{"value":"https://academic.microsoft.com/#/detail/2601254252","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Pablo Nicolas Romasanta","name":null,"surname":null,"rank":1,"pid":[{"value":"https://academic.microsoft.com/#/detail/2747275827","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":[{"value":"Innate immune system","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Circular dichroism","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Ligand","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Biophysics","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Surface plasmon resonance","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Binding site","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Receptor","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Major histocompatibility complex","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"biology.protein","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.54379076","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.54379076","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"MHC class I","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"biology.protein","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.55428535","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.55428535","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Stereochemistry","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"kinetic and thermodynamic studies of the interaction between activating and inhibitory ly49 natural killer receptors and mhc class i molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null},{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":null,"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2D d and H-2D k ). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼10 6  M –1 ) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2017-01-01","dataInfo":null},"publisher":{"value":"Portland Press Limited","dataInfo":null},"embargoenddate":null,"source":[{"value":null,"dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":null,"instancetype":null,"hostedby":null,"url":["https://pubmed.ncbi.nlm.nih.gov/27831490/","https://www.ncbi.nlm.nih.gov/pubmed/27831490","https://ri.conicet.gov.ar/handle/11336/47496","https://portlandpress.com/biochemj/article/474/1/179/49423/Kinetic-and-thermodynamic-studies-of-the","https://academic.microsoft.com/#/detail/2556626411"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null},"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":null,"issnLinking":null,"ep":"194","iss":"1","sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_first.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_first.json new file mode 100644 index 000000000..9c9740f92 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_first.json @@ -0,0 +1,2 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585329822470,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["10.1042/bcj20160876","50|doiboost____::f248499c5e0b967ce27df8ed45bffe53"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-03-27T17:23:42Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":null,"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2016-11-10T13:04:33Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2016-12-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2Dd and H-2Dk). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼106 M–1) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"publisher":{"value":"Portland Press Ltd.","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://portlandpress.com/biochemj/article-pdf/474/1/179/688119/bcj-2016-0876.pdf","http://dx.doi.org/10.1042/bcj20160876"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":"1470-8728","issnLinking":null,"ep":"194","iss":null,"sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doiboost____::f248499c5e0b967ce27df8ed45bffe53","originalId":null,"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Lucrecia Curto","name":"Lucrecia","surname":"Curto","rank":null,"pid":[{"value":"0000-0002-4335-5309","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Emilio Malchiodi","name":"Emilio","surname":"Malchiodi","rank":null,"pid":[{"value":"0000-0001-7501-3330","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sofia Noli Truant","name":"Sofia","surname":"Noli Truant","rank":null,"pid":[{"value":"0000-0002-5490-9186","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null,"journal":null} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_second.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_second.json new file mode 100644 index 000000000..701809743 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_second.json @@ -0,0 +1,2 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585329822470,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["10.1042/bcj20160876","50|doiboost____::f248499c5e0b967ce27df8ed45bffe53"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-03-27T17:23:42Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Lucrecia Curto","name":"Lucrecia","surname":"Curto","rank":null,"pid":[{"value":"0000-0002-4335-5309","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Emilio Malchiodi","name":"Emilio","surname":"Malchiodi","rank":null,"pid":[{"value":"0000-0001-7501-3330","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sofia Noli Truant","name":"Sofia","surname":"Noli Truant","rank":null,"pid":[{"value":"0000-0002-5490-9186","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2016-11-10T13:04:33Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2016-12-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2Dd and H-2Dk). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼106 M–1) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"publisher":{"value":"Portland Press Ltd.","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://portlandpress.com/biochemj/article-pdf/474/1/179/688119/bcj-2016-0876.pdf","http://dx.doi.org/10.1042/bcj20160876"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":"1470-8728","issnLinking":null,"ep":"194","iss":null,"sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["2556626411","10.1042/bcj20160876"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"María Belén Antonoglou","name":null,"surname":null,"rank":5,"pid":[{"value":"https://academic.microsoft.com/#/detail/2563193871","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Sofia Noli Truant","name":null,"surname":null,"rank":4,"pid":[{"value":"https://academic.microsoft.com/#/detail/2562625705","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"José M. Delfino","name":null,"surname":null,"rank":7,"pid":[{"value":"https://academic.microsoft.com/#/detail/2290824706","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Maria Belén Sarratea","name":null,"surname":null,"rank":3,"pid":[{"value":"https://academic.microsoft.com/#/detail/2253085369","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"María Julieta Fernández Lynch","name":null,"surname":null,"rank":6,"pid":[{"value":"https://academic.microsoft.com/#/detail/2566594710","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Roy A. Mariuzza","name":null,"surname":null,"rank":8,"pid":[{"value":"https://academic.microsoft.com/#/detail/298169305","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Maryland, College Park","dataInfo":null}]},{"fullname":"Lucrecia María Curto","name":null,"surname":null,"rank":2,"pid":[{"value":"https://academic.microsoft.com/#/detail/2559673648","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Emilio L. Malchiodi","name":null,"surname":null,"rank":10,"pid":[{"value":"https://academic.microsoft.com/#/detail/1850809118","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Marisa M. Fernández","name":null,"surname":null,"rank":9,"pid":[{"value":"https://academic.microsoft.com/#/detail/2601254252","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Pablo Nicolas Romasanta","name":null,"surname":null,"rank":1,"pid":[{"value":"https://academic.microsoft.com/#/detail/2747275827","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":[{"value":"Innate immune system","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Circular dichroism","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Ligand","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Biophysics","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Surface plasmon resonance","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Binding site","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Receptor","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Major histocompatibility complex","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"biology.protein","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.54379076","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.54379076","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"MHC class I","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"biology.protein","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.55428535","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.55428535","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Stereochemistry","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"kinetic and thermodynamic studies of the interaction between activating and inhibitory ly49 natural killer receptors and mhc class i molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null},{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":null,"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2D d and H-2D k ). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼10 6  M –1 ) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2017-01-01","dataInfo":null},"publisher":{"value":"Portland Press Limited","dataInfo":null},"embargoenddate":null,"source":[{"value":null,"dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":null,"instancetype":null,"hostedby":null,"url":["https://pubmed.ncbi.nlm.nih.gov/27831490/","https://www.ncbi.nlm.nih.gov/pubmed/27831490","https://ri.conicet.gov.ar/handle/11336/47496","https://portlandpress.com/biochemj/article/474/1/179/49423/Kinetic-and-thermodynamic-studies-of-the","https://academic.microsoft.com/#/detail/2556626411"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null},"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":null,"issnLinking":null,"ep":"194","iss":"1","sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} \ No newline at end of file From b0d86d32b06e8d8b4d99017eaa6d0425e3ff49bf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 8 Jul 2021 18:56:29 +0200 Subject: [PATCH 34/37] added list of author to be merged --- .../resources/eu/dnetlib/dhp/doiboost/matching_authors_second | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second index b533f4d8a..46597fcbd 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second @@ -1 +1,2 @@ {"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1585329822470,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["10.1042/bcj20160876","50|doiboost____::f248499c5e0b967ce27df8ed45bffe53"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-03-27T17:23:42Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Pablo N. Romasanta","name":"Pablo N.","surname":"Romasanta","rank":1,"pid":null,"affiliation":null},{"fullname":"Lucrecia M. Curto","name":"Lucrecia M.","surname":"Curto","rank":2,"pid":[{"value":"0000-0002-4335-5309","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"María B. Sarratea","name":"María B.","surname":"Sarratea","rank":3,"pid":null,"affiliation":null},{"fullname":"Sofía Noli Truant","name":"Sofía","surname":"Noli Truant","rank":4,"pid":[{"value":"0000-0002-5490-9186","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"María B. Antonoglou","name":"María B.","surname":"Antonoglou","rank":5,"pid":null,"affiliation":null},{"fullname":"María J. Fernández Lynch","name":"María J.","surname":"Fernández Lynch","rank":6,"pid":null,"affiliation":null},{"fullname":"José M. Delfino","name":"José M.","surname":"Delfino","rank":7,"pid":null,"affiliation":null},{"fullname":"Roy A. Mariuzza","name":"Roy A.","surname":"Mariuzza","rank":8,"pid":null,"affiliation":null},{"fullname":"Marisa M. Fernández","name":"Marisa M.","surname":"Fernández","rank":9,"pid":null,"affiliation":null},{"fullname":"Emilio L. Malchiodi","name":"Emilio L.","surname":"Malchiodi","rank":10,"pid":[{"value":"0000-0001-7501-3330","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2016-11-10T13:04:33Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2016-12-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2Dd and H-2Dk). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼106 M–1) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"publisher":{"value":"Portland Press Ltd.","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://portlandpress.com/biochemj/article-pdf/474/1/179/688119/bcj-2016-0876.pdf","http://dx.doi.org/10.1042/bcj20160876"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"dateofacceptance":{"value":"2016-12-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":"1470-8728","issnLinking":null,"ep":"194","iss":null,"sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doi_________::f248499c5e0b967ce27df8ed45bffe53","originalId":["2556626411","10.1042/bcj20160876"],"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"María Belén Antonoglou","name":null,"surname":null,"rank":5,"pid":[{"value":"https://academic.microsoft.com/#/detail/2563193871","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Sofia Noli Truant","name":null,"surname":null,"rank":4,"pid":[{"value":"https://academic.microsoft.com/#/detail/2562625705","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"José M. Delfino","name":null,"surname":null,"rank":7,"pid":[{"value":"https://academic.microsoft.com/#/detail/2290824706","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Maria Belén Sarratea","name":null,"surname":null,"rank":3,"pid":[{"value":"https://academic.microsoft.com/#/detail/2253085369","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"María Julieta Fernández Lynch","name":null,"surname":null,"rank":6,"pid":[{"value":"https://academic.microsoft.com/#/detail/2566594710","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Roy A. Mariuzza","name":null,"surname":null,"rank":8,"pid":[{"value":"https://academic.microsoft.com/#/detail/298169305","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Maryland, College Park","dataInfo":null}]},{"fullname":"Lucrecia María Curto","name":null,"surname":null,"rank":2,"pid":[{"value":"https://academic.microsoft.com/#/detail/2559673648","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Emilio L. Malchiodi","name":null,"surname":null,"rank":10,"pid":[{"value":"https://academic.microsoft.com/#/detail/1850809118","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Marisa M. Fernández","name":null,"surname":null,"rank":9,"pid":[{"value":"https://academic.microsoft.com/#/detail/2601254252","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]},{"fullname":"Pablo Nicolas Romasanta","name":null,"surname":null,"rank":1,"pid":[{"value":"https://academic.microsoft.com/#/detail/2747275827","qualifier":{"classid":"URL","classname":"URL","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"affiliation":[{"value":"University of Buenos Aires","dataInfo":null}]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":[{"value":"Innate immune system","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Circular dichroism","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Ligand","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Biophysics","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Surface plasmon resonance","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Binding site","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Receptor","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Major histocompatibility complex","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"biology.protein","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.54379076","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.54379076","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"MHC class I","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"biology.protein","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.55428535","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"biology","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.55428535","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Stereochemistry","qualifier":{"classid":"MAG","classname":"Microsoft Academic Graph classification","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"kinetic and thermodynamic studies of the interaction between activating and inhibitory ly49 natural killer receptors and mhc class i molecules","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null},{"value":"Kinetic and thermodynamic studies of the interaction between activating and inhibitory Ly49 natural killer receptors and MHC class I molecules","qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":null,"description":[{"value":"Natural killer (NK) cells are lymphocytes of the innate immune system that eliminate virally infected or malignantly transformed cells. NK cell function is regulated by diverse surface receptors that are both activating and inhibitory. Among them, the homodimeric Ly49 receptors control NK cell cytotoxicity by sensing major histocompatibility complex class I molecules (MHC-I) on target cells. Although crystal structures have been reported for Ly49/MHC-I complexes, the underlying binding mechanism has not been elucidated. Accordingly, we carried out thermodynamic and kinetic experiments on the interaction of four NK Ly49 receptors (Ly49G, Ly49H, Ly49I and Ly49P) with two MHC-I ligands (H-2D d and H-2D k ). These Ly49s embrace the structural and functional diversity of the highly polymorphic Ly49 family. Combining surface plasmon resonance, fluorescence anisotropy and far-UV circular dichroism (CD), we determined that the best model to describe both inhibitory and activating Ly49/MHC-I interactions is one in which the two MHC-I binding sites of the Ly49 homodimer present similar binding constants for the two sites (∼10 6  M –1 ) with a slightly positive co-operativity in some cases, and without far-UV CD observable conformational changes. Furthermore, Ly49/MHC-I interactions are diffusion-controlled and enthalpy-driven. These features stand in marked contrast with the activation-controlled and entropy-driven interaction of Ly49s with the viral immunoevasin m157, which is characterized by strong positive co-operativity and conformational selection. These differences are explained by the distinct structures of Ly49/MHC-I and Ly49/m157 complexes. Moreover, they reflect the opposing roles of NK cells to rapidly scan for virally infected cells and of viruses to escape detection using immunoevasins such as m157.","dataInfo":null}],"dateofacceptance":{"value":"2017-01-01","dataInfo":null},"publisher":{"value":"Portland Press Limited","dataInfo":null},"embargoenddate":null,"source":[{"value":null,"dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":null,"accessright":null,"instancetype":null,"hostedby":null,"url":["https://pubmed.ncbi.nlm.nih.gov/27831490/","https://www.ncbi.nlm.nih.gov/pubmed/27831490","https://ri.conicet.gov.ar/handle/11336/47496","https://portlandpress.com/biochemj/article/474/1/179/49423/Kinetic-and-thermodynamic-studies-of-the","https://academic.microsoft.com/#/detail/2556626411"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null},"pid":[{"value":"10.1042/bcj20160876","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Biochemical Journal","issnPrinted":"0264-6021","issnOnline":null,"issnLinking":null,"ep":"194","iss":"1","sp":"179","vol":"474","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} From ae2b47b29d84bd10839d855f3a52747eb3fe99f4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 9 Jul 2021 15:47:51 +0200 Subject: [PATCH 35/37] [broker] added coalesce(1) on the stats dataset before storing it on postgres --- .../src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java index a4fb20b1c..9927d6560 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java @@ -80,6 +80,7 @@ public class GenerateStatsJob { .map( (MapFunction, DatasourceStats>) t -> t._2, Encoders.bean(DatasourceStats.class)) + .coalesce(1) .write() .mode(SaveMode.Overwrite) .jdbc(dbUrl, "oa_datasource_stats_temp", connectionProperties); From 1ea66e89174f34f20c9f726cc28156241649011e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 12 Jul 2021 10:06:29 +0200 Subject: [PATCH 36/37] some more tests for authormerger --- .../doiboost/DoiBoostAuthorMerger.java | 6 +- .../doiboost/DoiBoostAuthorMergerTest.java | 96 +++++++++++++++++++ .../dhp/doiboost/should_appear_author1.json | 2 + .../dhp/doiboost/should_appear_author2.json | 2 + .../dhp/doiboost/should_appear_author3.json | 2 + 5 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author1.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author2.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author3.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java index 578adb06b..70032097e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java @@ -90,8 +90,6 @@ public class DoiBoostAuthorMerger { private static void enrichPidFromList(List base, List enrich) { - if(base == null || enrich == null) - return ; //search authors having identifiers in the enrich list final List authorsWithPids = enrich @@ -115,11 +113,11 @@ public class DoiBoostAuthorMerger { for (Tuple2 t : t2._2()) { String mapEntry = DHPUtils.md5(t._1().getFullname()); AuthorAssoc aa = assocMap.get(mapEntry); - if(aa.getScore() < t._2()){ + if(aa.getScore() < t._2() && aa.getScore() < 0.9){ aa.setScore(t._2()); aa.setTo_be_enriched(new ArrayList<>()); aa.getTo_be_enriched().add(t2._1()); - }else if(aa.getScore() == t._2()){ + }else if(t._2() > 0.9){ aa.getTo_be_enriched().add(t2._1()); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java index 7be63e93e..0cad167ae 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java @@ -307,4 +307,100 @@ public class DoiBoostAuthorMergerTest { } + @Test + public void shouldMergeTest1() throws JsonProcessingException { + + authors = readSample(publicationsBasePath + "/should_appear_author1.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + + List merge = DoiBoostAuthorMerger.merge(authors, true); + + Assertions.assertTrue(6 == merge.stream().filter(a -> a.getPid() !=null) + .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count()); + + Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null) + .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count()); + + merge.stream().filter(a -> a.getRank() == 26) + .forEach(a -> + Assertions.assertTrue(a.getPid() + .stream() + .anyMatch(pid -> pid.getValue().equals("0000-0002-2445-5275") + && pid.getQualifier().getClassid().equals(ModelConstants.ORCID) + ) + ) + ); + + + } + + @Test + public void shouldMergeTest2() throws JsonProcessingException { + + authors = readSample(publicationsBasePath + "/should_appear_author2.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + + List merge = DoiBoostAuthorMerger.merge(authors, true); + + + + Assertions.assertTrue(5 == merge.stream().filter(a -> a.getPid() !=null) + .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count()); + + Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null) + .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count()); + + merge.stream().filter(a -> a.getFullname().equals("da luz geraldo eduardo")) + .forEach(a -> + Assertions.assertTrue(a.getPid() + .stream() + .anyMatch(pid -> pid.getValue().equals("http://orcid.org/0000-0003-2434-0387") + && pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING) + ) + ) + ); + + + } + + @Test + public void shouldNotMergeTest1() throws JsonProcessingException { + + authors = readSample(publicationsBasePath + "/should_appear_author3.json", Publication.class) + .stream() + .map(p -> p._2().getAuthor()) + .collect(Collectors.toList()); + + + List merge = DoiBoostAuthorMerger.merge(authors, true); + + System.out.println("Merge "); + for (Author author : merge) { + System.out.println(authorToString(author)); + } + +// Assertions.assertTrue(5 == merge.stream().filter(a -> a.getPid() !=null) +// .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count()); +// +// Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null) +// .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count()); +// +// merge.stream().filter(a -> a.getFullname().equals("da luz geraldo eduardo")) +// .forEach(a -> +// Assertions.assertTrue(a.getPid() +// .stream() +// .anyMatch(pid -> pid.getValue().equals("http://orcid.org/0000-0003-2434-0387") +// && pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING) +// ) +// ) +// ); + + + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author1.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author1.json new file mode 100644 index 000000000..69ec64a31 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author1.json @@ -0,0 +1,2 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1621266036755,"id":"50|doi_________::b11b34fa9dfbb5eb12953a525a8ff20e","originalId":["1295","10.1038/s41550-020-01295-8","50|doiboost____::b11b34fa9dfbb5eb12953a525a8ff20e"],"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-05-17T15:40:36Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Robert Stein","name":"Robert","surname":"Stein","rank":1,"pid":[{"value":"http://orcid.org/0000-0003-2434-0387","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sjoert van Velzen","name":"Sjoert van","surname":"Velzen","rank":2,"pid":[{"value":"http://orcid.org/0000-0002-3859-8074","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Marek Kowalski","name":"Marek","surname":"Kowalski","rank":3,"pid":[{"value":"http://orcid.org/0000-0001-8594-8666","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Anna Franckowiak","name":"Anna","surname":"Franckowiak","rank":4,"pid":null,"affiliation":null},{"fullname":"Suvi Gezari","name":"Suvi","surname":"Gezari","rank":5,"pid":[{"value":"http://orcid.org/0000-0003-3703-5154","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"James C. A. Miller-Jones","name":"James C. A.","surname":"Miller-Jones","rank":6,"pid":[{"value":"http://orcid.org/0000-0003-3124-2814","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sara Frederick","name":"Sara","surname":"Frederick","rank":7,"pid":null,"affiliation":null},{"fullname":"Itai Sfaradi","name":"Itai","surname":"Sfaradi","rank":8,"pid":[{"value":"http://orcid.org/0000-0003-0466-3779","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Michael F. Bietenholz","name":"Michael F.","surname":"Bietenholz","rank":9,"pid":null,"affiliation":null},{"fullname":"Assaf Horesh","name":"Assaf","surname":"Horesh","rank":10,"pid":[{"value":"http://orcid.org/0000-0002-5936-1156","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Rob Fender","name":"Rob","surname":"Fender","rank":11,"pid":null,"affiliation":null},{"fullname":"Simone Garrappa","name":"Simone","surname":"Garrappa","rank":12,"pid":[{"value":"http://orcid.org/0000-0003-2403-4582","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tomás Ahumada","name":"Tomás","surname":"Ahumada","rank":13,"pid":[{"value":"http://orcid.org/0000-0002-2184-6430","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Igor Andreoni","name":"Igor","surname":"Andreoni","rank":14,"pid":null,"affiliation":null},{"fullname":"Justin Belicki","name":"Justin","surname":"Belicki","rank":15,"pid":null,"affiliation":null},{"fullname":"Eric C. Bellm","name":"Eric C.","surname":"Bellm","rank":16,"pid":[{"value":"http://orcid.org/0000-0001-8018-5348","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Markus Böttcher","name":"Markus","surname":"Böttcher","rank":17,"pid":null,"affiliation":null},{"fullname":"Valery Brinnel","name":"Valery","surname":"Brinnel","rank":18,"pid":null,"affiliation":null},{"fullname":"Rick Burruss","name":"Rick","surname":"Burruss","rank":19,"pid":null,"affiliation":null},{"fullname":"S. Bradley Cenko","name":"S. Bradley","surname":"Cenko","rank":20,"pid":[{"value":"http://orcid.org/0000-0003-1673-970X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Michael W. Coughlin","name":"Michael W.","surname":"Coughlin","rank":21,"pid":[{"value":"http://orcid.org/0000-0002-8262-2924","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Virginia Cunningham","name":"Virginia","surname":"Cunningham","rank":22,"pid":[{"value":"http://orcid.org/0000-0003-2292-0441","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Andrew Drake","name":"Andrew","surname":"Drake","rank":23,"pid":null,"affiliation":null},{"fullname":"Glennys R. Farrar","name":"Glennys R.","surname":"Farrar","rank":24,"pid":null,"affiliation":null},{"fullname":"Michael Feeney","name":"Michael","surname":"Feeney","rank":25,"pid":null,"affiliation":null},{"fullname":"Ryan J. Foley","name":"Ryan J.","surname":"Foley","rank":26,"pid":null,"affiliation":null},{"fullname":"Avishay Gal-Yam","name":"Avishay","surname":"Gal-Yam","rank":27,"pid":[{"value":"http://orcid.org/0000-0002-3653-5598","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"V. Zach Golkhou","name":"V. Zach","surname":"Golkhou","rank":28,"pid":null,"affiliation":null},{"fullname":"Ariel Goobar","name":"Ariel","surname":"Goobar","rank":29,"pid":[{"value":"http://orcid.org/0000-0002-4163-4996","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Matthew J. Graham","name":"Matthew J.","surname":"Graham","rank":30,"pid":[{"value":"http://orcid.org/0000-0002-3168-0139","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Erica Hammerstein","name":"Erica","surname":"Hammerstein","rank":31,"pid":null,"affiliation":null},{"fullname":"George Helou","name":"George","surname":"Helou","rank":32,"pid":[{"value":"http://orcid.org/0000-0003-3367-3415","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tiara Hung","name":"Tiara","surname":"Hung","rank":33,"pid":[{"value":"http://orcid.org/0000-0002-9878-7889","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Mansi M. Kasliwal","name":"Mansi M.","surname":"Kasliwal","rank":34,"pid":null,"affiliation":null},{"fullname":"Charles D. Kilpatrick","name":"Charles D.","surname":"Kilpatrick","rank":35,"pid":[{"value":"http://orcid.org/0000-0002-5740-7747","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Albert K. H. Kong","name":"Albert K. H.","surname":"Kong","rank":36,"pid":[{"value":"http://orcid.org/0000-0002-5105-344X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Thomas Kupfer","name":"Thomas","surname":"Kupfer","rank":37,"pid":[{"value":"http://orcid.org/0000-0002-6540-1484","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Russ R. Laher","name":"Russ R.","surname":"Laher","rank":38,"pid":[{"value":"http://orcid.org/0000-0003-2451-5482","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ashish A. Mahabal","name":"Ashish A.","surname":"Mahabal","rank":39,"pid":[{"value":"http://orcid.org/0000-0003-2242-0244","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Frank J. Masci","name":"Frank J.","surname":"Masci","rank":40,"pid":[{"value":"http://orcid.org/0000-0002-8532-9395","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jannis Necker","name":"Jannis","surname":"Necker","rank":41,"pid":[{"value":"http://orcid.org/0000-0003-0280-7484","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jakob Nordin","name":"Jakob","surname":"Nordin","rank":42,"pid":[{"value":"http://orcid.org/0000-0001-8342-6274","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Daniel A. Perley","name":"Daniel A.","surname":"Perley","rank":43,"pid":null,"affiliation":null},{"fullname":"Mickael Rigault","name":"Mickael","surname":"Rigault","rank":44,"pid":[{"value":"http://orcid.org/0000-0002-8121-2560","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Simeon Reusch","name":"Simeon","surname":"Reusch","rank":45,"pid":[{"value":"http://orcid.org/0000-0002-7788-628X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Hector Rodriguez","name":"Hector","surname":"Rodriguez","rank":46,"pid":null,"affiliation":null},{"fullname":"César Rojas-Bravo","name":"César","surname":"Rojas-Bravo","rank":47,"pid":[{"value":"http://orcid.org/0000-0002-7559-315X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ben Rusholme","name":"Ben","surname":"Rusholme","rank":48,"pid":[{"value":"http://orcid.org/0000-0001-7648-4142","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"David L. Shupe","name":"David L.","surname":"Shupe","rank":49,"pid":[{"value":"http://orcid.org/0000-0003-4401-0430","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Leo P. Singer","name":"Leo P.","surname":"Singer","rank":50,"pid":[{"value":"http://orcid.org/0000-0001-9898-5597","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jesper Sollerman","name":"Jesper","surname":"Sollerman","rank":51,"pid":[{"value":"http://orcid.org/0000-0003-1546-6615","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Maayane T. Soumagnac","name":"Maayane T.","surname":"Soumagnac","rank":52,"pid":null,"affiliation":null},{"fullname":"Daniel Stern","name":"Daniel","surname":"Stern","rank":53,"pid":null,"affiliation":null},{"fullname":"Kirsty Taggart","name":"Kirsty","surname":"Taggart","rank":54,"pid":null,"affiliation":null},{"fullname":"Jakob van Santen","name":"Jakob","surname":"van Santen","rank":55,"pid":null,"affiliation":null},{"fullname":"Charlotte Ward","name":"Charlotte","surname":"Ward","rank":56,"pid":null,"affiliation":null},{"fullname":"Patrick Woudt","name":"Patrick","surname":"Woudt","rank":57,"pid":null,"affiliation":null},{"fullname":"Yuhan Yao","name":"Yuhan","surname":"Yao","rank":58,"pid":[{"value":"http://orcid.org/0000-0001-6747-8509","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"A tidal disruption event coincident with a high-energy neutrino","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2021-02-22T17:03:42Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2021-02-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2021-02-22","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":{"value":"https://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://www.nature.com/articles/s41550-020-01295-8.pdf","http://www.nature.com/articles/s41550-020-01295-8","http://dx.doi.org/10.1038/s41550-020-01295-8"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2021-02-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Nature Astronomy","issnPrinted":null,"issnOnline":"2397-3366","issnLinking":null,"ep":"518","iss":null,"sp":"510","vol":"5","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doi_________::b11b34fa9dfbb5eb12953a525a8ff20e","originalId":null,"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Marek Kowalski","name":"Marek","surname":"Kowalski","rank":null,"pid":[{"value":"0000-0001-8594-8666","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Itai Sfaradi","name":"Itai","surname":"Sfaradi","rank":null,"pid":[{"value":"0000-0003-0466-3779","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"James Miller-Jones","name":"James","surname":"Miller-Jones","rank":null,"pid":[{"value":"0000-0003-3124-2814","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Assaf Horesh","name":"Assaf","surname":"Horesh","rank":null,"pid":[{"value":"0000-0002-5936-1156","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Albert Kong","name":"Albert","surname":"Kong","rank":null,"pid":[{"value":"0000-0002-5105-344X","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ryan Foley","name":"Ryan","surname":"Foley","rank":null,"pid":[{"value":"0000-0002-2445-5275","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null,"journal":null} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author2.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author2.json new file mode 100644 index 000000000..67d5dcbd3 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author2.json @@ -0,0 +1,2 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1621266036755,"id":"50|doi_________::b11b34fa9dfbb5eb12953a525a8ff20e","originalId":["1295","10.1038/s41550-020-01295-8","50|doiboost____::b11b34fa9dfbb5eb12953a525a8ff20e"],"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-05-17T15:40:36Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"da luz geraldo eduardo","name":"Robert","surname":"Stein","rank":1,"pid":[{"value":"http://orcid.org/0000-0003-2434-0387","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sjoert van Velzen","name":"Sjoert van","surname":"Velzen","rank":2,"pid":[{"value":"http://orcid.org/0000-0002-3859-8074","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Marek Kowalski","name":"Marek","surname":"Kowalski","rank":3,"pid":[{"value":"http://orcid.org/0000-0001-8594-8666","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Anna Franckowiak","name":"Anna","surname":"Franckowiak","rank":4,"pid":null,"affiliation":null},{"fullname":"Suvi Gezari","name":"Suvi","surname":"Gezari","rank":5,"pid":[{"value":"http://orcid.org/0000-0003-3703-5154","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"James C. A. Miller-Jones","name":"James C. A.","surname":"Miller-Jones","rank":6,"pid":[{"value":"http://orcid.org/0000-0003-3124-2814","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sara Frederick","name":"Sara","surname":"Frederick","rank":7,"pid":null,"affiliation":null},{"fullname":"Itai Sfaradi","name":"Itai","surname":"Sfaradi","rank":8,"pid":[{"value":"http://orcid.org/0000-0003-0466-3779","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Michael F. Bietenholz","name":"Michael F.","surname":"Bietenholz","rank":9,"pid":null,"affiliation":null},{"fullname":"Assaf Horesh","name":"Assaf","surname":"Horesh","rank":10,"pid":[{"value":"http://orcid.org/0000-0002-5936-1156","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Rob Fender","name":"Rob","surname":"Fender","rank":11,"pid":null,"affiliation":null},{"fullname":"Simone Garrappa","name":"Simone","surname":"Garrappa","rank":12,"pid":[{"value":"http://orcid.org/0000-0003-2403-4582","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tomás Ahumada","name":"Tomás","surname":"Ahumada","rank":13,"pid":[{"value":"http://orcid.org/0000-0002-2184-6430","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Igor Andreoni","name":"Igor","surname":"Andreoni","rank":14,"pid":null,"affiliation":null},{"fullname":"Justin Belicki","name":"Justin","surname":"Belicki","rank":15,"pid":null,"affiliation":null},{"fullname":"Eric C. Bellm","name":"Eric C.","surname":"Bellm","rank":16,"pid":[{"value":"http://orcid.org/0000-0001-8018-5348","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Markus Böttcher","name":"Markus","surname":"Böttcher","rank":17,"pid":null,"affiliation":null},{"fullname":"Valery Brinnel","name":"Valery","surname":"Brinnel","rank":18,"pid":null,"affiliation":null},{"fullname":"Rick Burruss","name":"Rick","surname":"Burruss","rank":19,"pid":null,"affiliation":null},{"fullname":"S. Bradley Cenko","name":"S. Bradley","surname":"Cenko","rank":20,"pid":[{"value":"http://orcid.org/0000-0003-1673-970X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Michael W. Coughlin","name":"Michael W.","surname":"Coughlin","rank":21,"pid":[{"value":"http://orcid.org/0000-0002-8262-2924","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Virginia Cunningham","name":"Virginia","surname":"Cunningham","rank":22,"pid":[{"value":"http://orcid.org/0000-0003-2292-0441","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Andrew Drake","name":"Andrew","surname":"Drake","rank":23,"pid":null,"affiliation":null},{"fullname":"Glennys R. Farrar","name":"Glennys R.","surname":"Farrar","rank":24,"pid":null,"affiliation":null},{"fullname":"Michael Feeney","name":"Michael","surname":"Feeney","rank":25,"pid":null,"affiliation":null},{"fullname":"Ryan J. Foley","name":"Ryan J.","surname":"Foley","rank":26,"pid":null,"affiliation":null},{"fullname":"Avishay Gal-Yam","name":"Avishay","surname":"Gal-Yam","rank":27,"pid":[{"value":"http://orcid.org/0000-0002-3653-5598","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"V. Zach Golkhou","name":"V. Zach","surname":"Golkhou","rank":28,"pid":null,"affiliation":null},{"fullname":"Ariel Goobar","name":"Ariel","surname":"Goobar","rank":29,"pid":[{"value":"http://orcid.org/0000-0002-4163-4996","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Matthew J. Graham","name":"Matthew J.","surname":"Graham","rank":30,"pid":[{"value":"http://orcid.org/0000-0002-3168-0139","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Erica Hammerstein","name":"Erica","surname":"Hammerstein","rank":31,"pid":null,"affiliation":null},{"fullname":"George Helou","name":"George","surname":"Helou","rank":32,"pid":[{"value":"http://orcid.org/0000-0003-3367-3415","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tiara Hung","name":"Tiara","surname":"Hung","rank":33,"pid":[{"value":"http://orcid.org/0000-0002-9878-7889","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Mansi M. Kasliwal","name":"Mansi M.","surname":"Kasliwal","rank":34,"pid":null,"affiliation":null},{"fullname":"Charles D. Kilpatrick","name":"Charles D.","surname":"Kilpatrick","rank":35,"pid":[{"value":"http://orcid.org/0000-0002-5740-7747","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Albert K. H. Kong","name":"Albert K. H.","surname":"Kong","rank":36,"pid":[{"value":"http://orcid.org/0000-0002-5105-344X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Thomas Kupfer","name":"Thomas","surname":"Kupfer","rank":37,"pid":[{"value":"http://orcid.org/0000-0002-6540-1484","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Russ R. Laher","name":"Russ R.","surname":"Laher","rank":38,"pid":[{"value":"http://orcid.org/0000-0003-2451-5482","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ashish A. Mahabal","name":"Ashish A.","surname":"Mahabal","rank":39,"pid":[{"value":"http://orcid.org/0000-0003-2242-0244","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Frank J. Masci","name":"Frank J.","surname":"Masci","rank":40,"pid":[{"value":"http://orcid.org/0000-0002-8532-9395","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jannis Necker","name":"Jannis","surname":"Necker","rank":41,"pid":[{"value":"http://orcid.org/0000-0003-0280-7484","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jakob Nordin","name":"Jakob","surname":"Nordin","rank":42,"pid":[{"value":"http://orcid.org/0000-0001-8342-6274","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Daniel A. Perley","name":"Daniel A.","surname":"Perley","rank":43,"pid":null,"affiliation":null},{"fullname":"Mickael Rigault","name":"Mickael","surname":"Rigault","rank":44,"pid":[{"value":"http://orcid.org/0000-0002-8121-2560","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Simeon Reusch","name":"Simeon","surname":"Reusch","rank":45,"pid":[{"value":"http://orcid.org/0000-0002-7788-628X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Hector Rodriguez","name":"Hector","surname":"Rodriguez","rank":46,"pid":null,"affiliation":null},{"fullname":"César Rojas-Bravo","name":"César","surname":"Rojas-Bravo","rank":47,"pid":[{"value":"http://orcid.org/0000-0002-7559-315X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ben Rusholme","name":"Ben","surname":"Rusholme","rank":48,"pid":[{"value":"http://orcid.org/0000-0001-7648-4142","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"David L. Shupe","name":"David L.","surname":"Shupe","rank":49,"pid":[{"value":"http://orcid.org/0000-0003-4401-0430","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Leo P. Singer","name":"Leo P.","surname":"Singer","rank":50,"pid":[{"value":"http://orcid.org/0000-0001-9898-5597","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jesper Sollerman","name":"Jesper","surname":"Sollerman","rank":51,"pid":[{"value":"http://orcid.org/0000-0003-1546-6615","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Maayane T. Soumagnac","name":"Maayane T.","surname":"Soumagnac","rank":52,"pid":null,"affiliation":null},{"fullname":"Daniel Stern","name":"Daniel","surname":"Stern","rank":53,"pid":null,"affiliation":null},{"fullname":"Kirsty Taggart","name":"Kirsty","surname":"Taggart","rank":54,"pid":null,"affiliation":null},{"fullname":"Jakob van Santen","name":"Jakob","surname":"van Santen","rank":55,"pid":null,"affiliation":null},{"fullname":"Charlotte Ward","name":"Charlotte","surname":"Ward","rank":56,"pid":null,"affiliation":null},{"fullname":"Patrick Woudt","name":"Patrick","surname":"Woudt","rank":57,"pid":null,"affiliation":null},{"fullname":"Yuhan Yao","name":"Yuhan","surname":"Yao","rank":58,"pid":[{"value":"http://orcid.org/0000-0001-6747-8509","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"A tidal disruption event coincident with a high-energy neutrino","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2021-02-22T17:03:42Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2021-02-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2021-02-22","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":{"value":"https://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://www.nature.com/articles/s41550-020-01295-8.pdf","http://www.nature.com/articles/s41550-020-01295-8","http://dx.doi.org/10.1038/s41550-020-01295-8"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2021-02-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Nature Astronomy","issnPrinted":null,"issnOnline":"2397-3366","issnLinking":null,"ep":"518","iss":null,"sp":"510","vol":"5","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doi_________::b11b34fa9dfbb5eb12953a525a8ff20e","originalId":null,"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"geraldo luz jr","name":"Marek","surname":"Kowalski","rank":null,"pid":[{"value":"0000-0003-4950-6742","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Itai Sfaradi","name":"Itai","surname":"Sfaradi","rank":null,"pid":[{"value":"0000-0003-0466-3779","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"James Miller-Jones","name":"James","surname":"Miller-Jones","rank":null,"pid":[{"value":"0000-0003-3124-2814","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Assaf Horesh","name":"Assaf","surname":"Horesh","rank":null,"pid":[{"value":"0000-0002-5936-1156","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Albert Kong","name":"Albert","surname":"Kong","rank":null,"pid":[{"value":"0000-0002-5105-344X","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ryan Foley","name":"Ryan","surname":"Foley","rank":null,"pid":[{"value":"0000-0002-2445-5275","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null,"journal":null} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author3.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author3.json new file mode 100644 index 000000000..d57a9b3fb --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author3.json @@ -0,0 +1,2 @@ +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1621266036755,"id":"50|doi_________::b11b34fa9dfbb5eb12953a525a8ff20e","originalId":["1295","10.1038/s41550-020-01295-8","50|doiboost____::b11b34fa9dfbb5eb12953a525a8ff20e"],"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-05-17T15:40:36Z","dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"Tsao-Hsuan Tsai","name":"Robert","surname":"Stein","rank":1,"pid":[{"value":"http://orcid.org/0000-0003-2434-0387","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Yu-Hsuan Tsai","name":"Sjoert van","surname":"Velzen","rank":2,"pid":[{"value":"http://orcid.org/0000-0002-3859-8074","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"chia hsuan tsai","name":"Marek","surname":"Kowalski","rank":3,"pid":[{"value":"http://orcid.org/0000-0001-8594-8666","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Anna Franckowiak","name":"Anna","surname":"Franckowiak","rank":4,"pid":null,"affiliation":null},{"fullname":"Suvi Gezari","name":"Suvi","surname":"Gezari","rank":5,"pid":[{"value":"http://orcid.org/0000-0003-3703-5154","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"James C. A. Miller-Jones","name":"James C. A.","surname":"Miller-Jones","rank":6,"pid":[{"value":"http://orcid.org/0000-0003-3124-2814","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sara Frederick","name":"Sara","surname":"Frederick","rank":7,"pid":null,"affiliation":null},{"fullname":"Itai Sfaradi","name":"Itai","surname":"Sfaradi","rank":8,"pid":[{"value":"http://orcid.org/0000-0003-0466-3779","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Michael F. Bietenholz","name":"Michael F.","surname":"Bietenholz","rank":9,"pid":null,"affiliation":null},{"fullname":"Assaf Horesh","name":"Assaf","surname":"Horesh","rank":10,"pid":[{"value":"http://orcid.org/0000-0002-5936-1156","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Rob Fender","name":"Rob","surname":"Fender","rank":11,"pid":null,"affiliation":null},{"fullname":"Simone Garrappa","name":"Simone","surname":"Garrappa","rank":12,"pid":[{"value":"http://orcid.org/0000-0003-2403-4582","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tomás Ahumada","name":"Tomás","surname":"Ahumada","rank":13,"pid":[{"value":"http://orcid.org/0000-0002-2184-6430","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Igor Andreoni","name":"Igor","surname":"Andreoni","rank":14,"pid":null,"affiliation":null},{"fullname":"Justin Belicki","name":"Justin","surname":"Belicki","rank":15,"pid":null,"affiliation":null},{"fullname":"Eric C. Bellm","name":"Eric C.","surname":"Bellm","rank":16,"pid":[{"value":"http://orcid.org/0000-0001-8018-5348","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Markus Böttcher","name":"Markus","surname":"Böttcher","rank":17,"pid":null,"affiliation":null},{"fullname":"Valery Brinnel","name":"Valery","surname":"Brinnel","rank":18,"pid":null,"affiliation":null},{"fullname":"Rick Burruss","name":"Rick","surname":"Burruss","rank":19,"pid":null,"affiliation":null},{"fullname":"S. Bradley Cenko","name":"S. Bradley","surname":"Cenko","rank":20,"pid":[{"value":"http://orcid.org/0000-0003-1673-970X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Michael W. Coughlin","name":"Michael W.","surname":"Coughlin","rank":21,"pid":[{"value":"http://orcid.org/0000-0002-8262-2924","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Virginia Cunningham","name":"Virginia","surname":"Cunningham","rank":22,"pid":[{"value":"http://orcid.org/0000-0003-2292-0441","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Andrew Drake","name":"Andrew","surname":"Drake","rank":23,"pid":null,"affiliation":null},{"fullname":"Glennys R. Farrar","name":"Glennys R.","surname":"Farrar","rank":24,"pid":null,"affiliation":null},{"fullname":"Michael Feeney","name":"Michael","surname":"Feeney","rank":25,"pid":null,"affiliation":null},{"fullname":"Ryan J. Foley","name":"Ryan J.","surname":"Foley","rank":26,"pid":null,"affiliation":null},{"fullname":"Avishay Gal-Yam","name":"Avishay","surname":"Gal-Yam","rank":27,"pid":[{"value":"http://orcid.org/0000-0002-3653-5598","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"V. Zach Golkhou","name":"V. Zach","surname":"Golkhou","rank":28,"pid":null,"affiliation":null},{"fullname":"Ariel Goobar","name":"Ariel","surname":"Goobar","rank":29,"pid":[{"value":"http://orcid.org/0000-0002-4163-4996","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Matthew J. Graham","name":"Matthew J.","surname":"Graham","rank":30,"pid":[{"value":"http://orcid.org/0000-0002-3168-0139","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Erica Hammerstein","name":"Erica","surname":"Hammerstein","rank":31,"pid":null,"affiliation":null},{"fullname":"George Helou","name":"George","surname":"Helou","rank":32,"pid":[{"value":"http://orcid.org/0000-0003-3367-3415","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tiara Hung","name":"Tiara","surname":"Hung","rank":33,"pid":[{"value":"http://orcid.org/0000-0002-9878-7889","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Mansi M. Kasliwal","name":"Mansi M.","surname":"Kasliwal","rank":34,"pid":null,"affiliation":null},{"fullname":"Charles D. Kilpatrick","name":"Charles D.","surname":"Kilpatrick","rank":35,"pid":[{"value":"http://orcid.org/0000-0002-5740-7747","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Albert K. H. Kong","name":"Albert K. H.","surname":"Kong","rank":36,"pid":[{"value":"http://orcid.org/0000-0002-5105-344X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Thomas Kupfer","name":"Thomas","surname":"Kupfer","rank":37,"pid":[{"value":"http://orcid.org/0000-0002-6540-1484","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Russ R. Laher","name":"Russ R.","surname":"Laher","rank":38,"pid":[{"value":"http://orcid.org/0000-0003-2451-5482","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ashish A. Mahabal","name":"Ashish A.","surname":"Mahabal","rank":39,"pid":[{"value":"http://orcid.org/0000-0003-2242-0244","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Frank J. Masci","name":"Frank J.","surname":"Masci","rank":40,"pid":[{"value":"http://orcid.org/0000-0002-8532-9395","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jannis Necker","name":"Jannis","surname":"Necker","rank":41,"pid":[{"value":"http://orcid.org/0000-0003-0280-7484","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jakob Nordin","name":"Jakob","surname":"Nordin","rank":42,"pid":[{"value":"http://orcid.org/0000-0001-8342-6274","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Daniel A. Perley","name":"Daniel A.","surname":"Perley","rank":43,"pid":null,"affiliation":null},{"fullname":"Mickael Rigault","name":"Mickael","surname":"Rigault","rank":44,"pid":[{"value":"http://orcid.org/0000-0002-8121-2560","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Simeon Reusch","name":"Simeon","surname":"Reusch","rank":45,"pid":[{"value":"http://orcid.org/0000-0002-7788-628X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Hector Rodriguez","name":"Hector","surname":"Rodriguez","rank":46,"pid":null,"affiliation":null},{"fullname":"César Rojas-Bravo","name":"César","surname":"Rojas-Bravo","rank":47,"pid":[{"value":"http://orcid.org/0000-0002-7559-315X","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ben Rusholme","name":"Ben","surname":"Rusholme","rank":48,"pid":[{"value":"http://orcid.org/0000-0001-7648-4142","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"David L. Shupe","name":"David L.","surname":"Shupe","rank":49,"pid":[{"value":"http://orcid.org/0000-0003-4401-0430","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Leo P. Singer","name":"Leo P.","surname":"Singer","rank":50,"pid":[{"value":"http://orcid.org/0000-0001-9898-5597","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Jesper Sollerman","name":"Jesper","surname":"Sollerman","rank":51,"pid":[{"value":"http://orcid.org/0000-0003-1546-6615","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Maayane T. Soumagnac","name":"Maayane T.","surname":"Soumagnac","rank":52,"pid":null,"affiliation":null},{"fullname":"Daniel Stern","name":"Daniel","surname":"Stern","rank":53,"pid":null,"affiliation":null},{"fullname":"Kirsty Taggart","name":"Kirsty","surname":"Taggart","rank":54,"pid":null,"affiliation":null},{"fullname":"Jakob van Santen","name":"Jakob","surname":"van Santen","rank":55,"pid":null,"affiliation":null},{"fullname":"Charlotte Ward","name":"Charlotte","surname":"Ward","rank":56,"pid":null,"affiliation":null},{"fullname":"Patrick Woudt","name":"Patrick","surname":"Woudt","rank":57,"pid":null,"affiliation":null},{"fullname":"Yuhan Yao","name":"Yuhan","surname":"Yao","rank":58,"pid":[{"value":"http://orcid.org/0000-0001-6747-8509","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":[{"value":"A tidal disruption event coincident with a high-energy neutrino","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2021-02-22T17:03:42Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2021-02-22","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2021-02-22","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":null,"format":null,"contributor":null,"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":{"value":"https://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://www.nature.com/articles/s41550-020-01295-8.pdf","http://www.nature.com/articles/s41550-020-01295-8","http://dx.doi.org/10.1038/s41550-020-01295-8"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2021-02-22","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Nature Astronomy","issnPrinted":null,"issnOnline":"2397-3366","issnLinking":null,"ep":"518","iss":null,"sp":"510","vol":"5","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doi_________::b11b34fa9dfbb5eb12953a525a8ff20e","originalId":null,"pid":[{"value":"10.1038/s41550-020-01295-8","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"measures":null,"author":[{"fullname":"hsuan Tsai","name":"Marek","surname":"Kowalski","rank":null,"pid":[{"value":"0000-0003-4950-6742","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Itai Sfaradi","name":"Itai","surname":"Sfaradi","rank":null,"pid":[{"value":"0000-0003-0466-3779","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"James Miller-Jones","name":"James","surname":"Miller-Jones","rank":null,"pid":[{"value":"0000-0003-3124-2814","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Assaf Horesh","name":"Assaf","surname":"Horesh","rank":null,"pid":[{"value":"0000-0002-5936-1156","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Albert Kong","name":"Albert","surname":"Kong","rank":null,"pid":[{"value":"0000-0002-5105-344X","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Ryan Foley","name":"Ryan","surname":"Foley","rank":null,"pid":[{"value":"0000-0002-2445-5275","qualifier":{"classid":"orcid","classname":"orcid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.91","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null,"journal":null} \ No newline at end of file From c26980f1c419b7c104a1bb014d383c2e33532a85 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 10:33:00 +0200 Subject: [PATCH 37/37] Adding spark.close() to avoid Only one SparkContext may be running in this JVM error while running test on Jenkins and fixed issue --- .../eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index 7628fb853..69985dae2 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -61,7 +61,7 @@ class MappingORCIDToOAFTest { assertTrue(oA == p.count()) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p.first())) - + spark.close() } @@ -78,8 +78,8 @@ class MappingORCIDToOAFTest { val oaf = ORCIDToOAF.convertTOOAF(orcid) assert(oaf.getPid.size() == 1) oaf.getPid.toList.foreach(pid => assert(pid.getQualifier.getClassid.equals("doi"))) - oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876".toLowerCase()))) - //println(mapper.writeValueAsString(ORCIDToOAF.convertTOOAF(orcid))) + oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876"))) + //println(mapper.writeValueAsString(oaf)) }