From a1297082e2df74e672512334238143542ad323ce Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Tue, 19 Nov 2024 14:57:18 +0100 Subject: [PATCH 1/2] Crossref Enhancements: -Accurate Review Type Assignment: Resolved an issue identified in ticket https://support.openaire.eu/issues/9525#note-13. When a relationship of "is-review-of" is detected, the publication type is now correctly set to "Review." -Enhanced Author Affiliation Data: Implemented Miriam's suggestion by including a new field, "RawAffiliationString," in each author entry. This additional data provides a more granular level of detail regarding author affiliations, potentially improving discoverability and research analysis. --- .../collection/crossref/Crossref2Oaf.scala | 67 ++--- .../collection/crossref/affiliationTest.json | 232 ++++++++++++++++++ .../crossref/CrossrefMappingTest.scala | 12 +- 3 files changed, 265 insertions(+), 46 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/affiliationTest.json diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index e7d68920b8..e15312e43a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -37,7 +37,7 @@ case class mappingAuthor( family: Option[String], sequence: Option[String], ORCID: Option[String], - affiliation: Option[mappingAffiliation] + affiliation: Option[List[mappingAffiliation]] ) {} case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {} @@ -457,15 +457,14 @@ case object Crossref2Oaf { } //Mapping Author - val authorList: List[mappingAuthor] = - (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined) + val authorList: List[mappingAuthor] = (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined) val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") ) result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => - generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index) + generateAuthor(a.given.orNull, a.family.get, a.ORCID.orNull, index, a.affiliation) }.asJava) // Mapping instance @@ -504,18 +503,6 @@ case object Crossref2Oaf { ) } - val is_review = json \ "relation" \ "is-review-of" \ "id" - - if (is_review != JNothing) { - instance.setInstancetype( - OafMapperUtils.qualifier( - "0015", - "peerReviewed", - ModelConstants.DNET_REVIEW_LEVELS, - ModelConstants.DNET_REVIEW_LEVELS - ) - ) - } if (doi.startsWith("10.3410") || doi.startsWith("10.12703")) instance.setHostedby( @@ -569,17 +556,24 @@ case object Crossref2Oaf { result } + + def generateIdentifier(oaf: Result, doi: String): String = { val id = DHPUtils.md5(doi.toLowerCase) s"50|doiboost____|$id" } - def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = { + private def generateAuthor(given: String, family: String, orcid: String, index: Int, affiliation: Option[List[mappingAffiliation]]): Author = { val a = new Author a.setName(given) a.setSurname(family) a.setFullname(s"$given $family") a.setRank(index + 1) + + // Adding Raw affiliation if it's defined + if (affiliation.isDefined) { + a.setRawAffiliationString(affiliation.get.map(a => a.name).asJava) + } if (StringUtils.isNotBlank(orcid)) a.setPid( List( @@ -705,11 +699,21 @@ case object Crossref2Oaf { val objectType = (json \ "type").extractOrElse[String](null) if (objectType == null) return resultList - val typology = getTypeQualifier(objectType, vocabularies) + + + // If the item has a relations is-review-of, then we force it to a peer-review + val is_review = json \ "relation" \ "is-review-of" \ "id" + var force_to_review = false + if (is_review != JNothing) { + force_to_review = true + } + + val typology = getTypeQualifier(if (force_to_review) "peer-review" else objectType, vocabularies) if (typology == null) return List() + val result = generateItemFromType(typology._2) if (result == null) return List() @@ -757,33 +761,6 @@ case object Crossref2Oaf { else resultList } - - // if (uw != null) { -// result.getCollectedfrom.add(createUnpayWallCollectedFrom()) -// val i: Instance = new Instance() -// i.setCollectedfrom(createUnpayWallCollectedFrom()) -// if (uw.best_oa_location != null) { -// -// i.setUrl(List(uw.best_oa_location.url).asJava) -// if (uw.best_oa_location.license.isDefined) { -// i.setLicense(field[String](uw.best_oa_location.license.get, null)) -// } -// -// val colour = get_unpaywall_color(uw.oa_status) -// if (colour.isDefined) { -// val a = new AccessRight -// a.setClassid(ModelConstants.ACCESS_RIGHT_OPEN) -// a.setClassname(ModelConstants.ACCESS_RIGHT_OPEN) -// a.setSchemeid(ModelConstants.DNET_ACCESS_MODES) -// a.setSchemename(ModelConstants.DNET_ACCESS_MODES) -// a.setOpenAccessRoute(colour.get) -// i.setAccessright(a) -// } -// i.setPid(result.getPid) -// result.getInstance().add(i) -// } -// } - } private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = { diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/affiliationTest.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/affiliationTest.json new file mode 100644 index 0000000000..201138e45b --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/affiliationTest.json @@ -0,0 +1,232 @@ +{ + "indexed": { + "date-parts": [ + [ + 2022, + 4, + 3 + ] + ], + "date-time": "2022-04-03T01:45:59Z", + "timestamp": 1648950359167 + }, + "reference-count": 0, + "publisher": "American Society of Clinical Oncology (ASCO)", + "issue": "18_suppl", + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "short-container-title": [ + "JCO" + ], + "published-print": { + "date-parts": [ + [ + 2007, + 6, + 20 + ] + ] + }, + "abstract": " 3507 Purpose: To detect IGF-1R on circulating tumor cells (CTCs) as a biomarker in the clinical development of a monoclonal human antibody, CP-751,871, targeting IGF-1R. Experimental Design: An automated sample preparation and analysis system for enumerating CTCs (Celltracks) was adapted for detecting IGF-1R positive CTCs with a diagnostic antibody targeting a different IGF-1R epitope to CP-751,871. This assay was utilized in three phase I trials of CP-751,871 as a single agent or with chemotherapy and was validated using cell lines and blood samples from healthy volunteers and patients with metastatic carcinoma. Results: There was no interference between the analytical and therapeutic antibodies. CP-751,871 was well tolerated as a single agent, and in combination with docetaxel or carboplatin and paclitaxel, at doses ranging from 0.05 mg/kg to 20 mg/kg. Eighty patients were enrolled on phase 1 studies of CP-751,871, with 47 (59%) patients having CTCs detected during the study. Prior to treatment 26 patients (33%) had CTCs, with 23 having detectable IGF-1R positive CTCs. CP-751,871 alone, and CP-751,871 with cytotoxic chemotherapy, decreased CTCs and IGF-1R positive CTCs; these increased towards the end of the 21-day cycle in some patients, falling again with retreatment. CTCs were commonest in advanced hormone refractory prostate cancer (11/20). Detectable IGF-1R expression on CTCs before treatment with CP-751,871 and docetaxel was associated with a higher frequency of PSA decline by more than 50% (6/10 vs 2/8 patients). A relationship was observed between sustained falls in CTCs counts and PSA declines by more than 50%. Conclusions: IGF-1R expression is detectable by immunofluorescence on CTCs. These data support the further evaluation of CTCs in pharmacodynamic studies and patient selection, particularly in advanced prostate cancer. No significant financial relationships to disclose. ", + "DOI": "10.1200/jco.2007.25.18_suppl.3507", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2020, + 3, + 6 + ] + ], + "date-time": "2020-03-06T20:50:42Z", + "timestamp": 1583527842000 + }, + "page": "3507-3507", + "source": "Crossref", + "is-referenced-by-count": 0, + "title": [ + "Circulating tumor cells expressing the insulin growth factor-1 receptor (IGF-1R): Method of detection, incidence and potential applications" + ], + "prefix": "10.1200", + "volume": "25", + "author": [ + { + "given": "J. S.", + "family": "de Bono", + "sequence": "first", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "A.", + "family": "Adjei", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "G.", + "family": "Attard", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "M.", + "family": "Pollak", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "P.", + "family": "Fong", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "P.", + "family": "Haluska", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "L.", + "family": "Roberts", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "D.", + "family": "Chainese", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "L.", + "family": "Terstappen", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + }, + { + "given": "A.", + "family": "Gualberto", + "sequence": "additional", + "affiliation": [ + { + "name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA" + } + ] + } + ], + "member": "233", + "container-title": [ + "Journal of Clinical Oncology" + ], + "original-title": [], + "language": "en", + "deposited": { + "date-parts": [ + [ + 2020, + 3, + 6 + ] + ], + "date-time": "2020-03-06T20:51:03Z", + "timestamp": 1583527863000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://ascopubs.org/doi/10.1200/jco.2007.25.18_suppl.3507" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2007, + 6, + 20 + ] + ] + }, + "references-count": 0, + "journal-issue": { + "issue": "18_suppl", + "published-print": { + "date-parts": [ + [ + 2007, + 6, + 20 + ] + ] + } + }, + "alternative-id": [ + "10.1200/jco.2007.25.18_suppl.3507" + ], + "URL": "http://dx.doi.org/10.1200/jco.2007.25.18_suppl.3507", + "relation": {}, + "ISSN": [ + "0732-183X", + "1527-7755" + ], + "issn-type": [ + { + "value": "0732-183X", + "type": "print" + }, + { + "value": "1527-7755", + "type": "electronic" + } + ], + "subject": [], + "published": { + "date-parts": [ + [ + 2007, + 6, + 20 + ] + ] + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala index c3ea884eb3..f6f71ca669 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala @@ -25,8 +25,18 @@ class CrossrefMappingTest extends AbstractVocabularyTest { val input = IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8") - println(Crossref2Oaf.convert(input, vocabularies, TransformationType.All)) + Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => { + println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(record)) + }) } + + @Test + def mappingAffiliation(): Unit = { + val input = + IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8") + val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult) + println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(data.head)) + } } From 496007188a720b18ce8301b6050c6e0b924522d1 Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Wed, 20 Nov 2024 09:50:09 +0100 Subject: [PATCH 2/2] Added assertion on CrossrefMappingTest --- .../crossref/CrossrefMappingTest.scala | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala index f6f71ca669..12ca14ba14 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/crossref/CrossrefMappingTest.scala @@ -3,12 +3,15 @@ package eu.dnetlib.dhp.collection.crossref import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.collection.crossref.Crossref2Oaf.TransformationType +import eu.dnetlib.dhp.schema.oaf.Publication import org.apache.commons.io.IOUtils -import org.junit.jupiter.api.{BeforeEach, Test} +import org.junit.jupiter.api.{Assertions, BeforeEach, Test} import org.junit.jupiter.api.extension.ExtendWith import org.mockito.junit.jupiter.MockitoExtension import org.slf4j.{Logger, LoggerFactory} +import scala.collection.JavaConverters.asScalaBufferConverter + @ExtendWith(Array(classOf[MockitoExtension])) class CrossrefMappingTest extends AbstractVocabularyTest { @@ -26,7 +29,7 @@ class CrossrefMappingTest extends AbstractVocabularyTest { IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8") Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => { - println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(record)) + Assertions.assertNotNull(record) }) } @@ -37,6 +40,16 @@ class CrossrefMappingTest extends AbstractVocabularyTest { val input = IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8") val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult) + data.foreach(record => { + Assertions.assertNotNull(record) + Assertions.assertTrue(record.isInstanceOf[Publication]) + val publication = record.asInstanceOf[Publication] + publication.getAuthor.asScala.foreach(author => { + Assertions.assertNotNull(author.getRawAffiliationString) + Assertions.assertTrue(author.getRawAffiliationString.size()>0) + + }) + }) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(data.head)) } }