1
0
Fork 0

Crossref Enhancements:

-Accurate Review Type Assignment: Resolved an issue identified in ticket https://support.openaire.eu/issues/9525#note-13. When a relationship of "is-review-of" is detected, the publication type is now correctly set to "Review."
-Enhanced Author Affiliation Data: Implemented Miriam's suggestion by including a new field, "RawAffiliationString," in each author entry. This additional data provides a more granular level of detail regarding author affiliations, potentially improving discoverability and research analysis.
This commit is contained in:
sandro.labruzzo 2024-11-19 14:57:18 +01:00
parent a42c8b7c85
commit a1297082e2
3 changed files with 265 additions and 46 deletions

View File

@ -37,7 +37,7 @@ case class mappingAuthor(
family: Option[String], family: Option[String],
sequence: Option[String], sequence: Option[String],
ORCID: Option[String], ORCID: Option[String],
affiliation: Option[mappingAffiliation] affiliation: Option[List[mappingAffiliation]]
) {} ) {}
case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {} case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {}
@ -457,15 +457,14 @@ case object Crossref2Oaf {
} }
//Mapping Author //Mapping Author
val authorList: List[mappingAuthor] = val authorList: List[mappingAuthor] = (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined)
(json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined)
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
) )
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index) generateAuthor(a.given.orNull, a.family.get, a.ORCID.orNull, index, a.affiliation)
}.asJava) }.asJava)
// Mapping instance // Mapping instance
@ -504,18 +503,6 @@ case object Crossref2Oaf {
) )
} }
val is_review = json \ "relation" \ "is-review-of" \ "id"
if (is_review != JNothing) {
instance.setInstancetype(
OafMapperUtils.qualifier(
"0015",
"peerReviewed",
ModelConstants.DNET_REVIEW_LEVELS,
ModelConstants.DNET_REVIEW_LEVELS
)
)
}
if (doi.startsWith("10.3410") || doi.startsWith("10.12703")) if (doi.startsWith("10.3410") || doi.startsWith("10.12703"))
instance.setHostedby( instance.setHostedby(
@ -569,17 +556,24 @@ case object Crossref2Oaf {
result result
} }
def generateIdentifier(oaf: Result, doi: String): String = { def generateIdentifier(oaf: Result, doi: String): String = {
val id = DHPUtils.md5(doi.toLowerCase) val id = DHPUtils.md5(doi.toLowerCase)
s"50|doiboost____|$id" s"50|doiboost____|$id"
} }
def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = { private def generateAuthor(given: String, family: String, orcid: String, index: Int, affiliation: Option[List[mappingAffiliation]]): Author = {
val a = new Author val a = new Author
a.setName(given) a.setName(given)
a.setSurname(family) a.setSurname(family)
a.setFullname(s"$given $family") a.setFullname(s"$given $family")
a.setRank(index + 1) a.setRank(index + 1)
// Adding Raw affiliation if it's defined
if (affiliation.isDefined) {
a.setRawAffiliationString(affiliation.get.map(a => a.name).asJava)
}
if (StringUtils.isNotBlank(orcid)) if (StringUtils.isNotBlank(orcid))
a.setPid( a.setPid(
List( List(
@ -705,11 +699,21 @@ case object Crossref2Oaf {
val objectType = (json \ "type").extractOrElse[String](null) val objectType = (json \ "type").extractOrElse[String](null)
if (objectType == null) if (objectType == null)
return resultList return resultList
val typology = getTypeQualifier(objectType, vocabularies)
// If the item has a relations is-review-of, then we force it to a peer-review
val is_review = json \ "relation" \ "is-review-of" \ "id"
var force_to_review = false
if (is_review != JNothing) {
force_to_review = true
}
val typology = getTypeQualifier(if (force_to_review) "peer-review" else objectType, vocabularies)
if (typology == null) if (typology == null)
return List() return List()
val result = generateItemFromType(typology._2) val result = generateItemFromType(typology._2)
if (result == null) if (result == null)
return List() return List()
@ -757,33 +761,6 @@ case object Crossref2Oaf {
else else
resultList resultList
} }
// if (uw != null) {
// result.getCollectedfrom.add(createUnpayWallCollectedFrom())
// val i: Instance = new Instance()
// i.setCollectedfrom(createUnpayWallCollectedFrom())
// if (uw.best_oa_location != null) {
//
// i.setUrl(List(uw.best_oa_location.url).asJava)
// if (uw.best_oa_location.license.isDefined) {
// i.setLicense(field[String](uw.best_oa_location.license.get, null))
// }
//
// val colour = get_unpaywall_color(uw.oa_status)
// if (colour.isDefined) {
// val a = new AccessRight
// a.setClassid(ModelConstants.ACCESS_RIGHT_OPEN)
// a.setClassname(ModelConstants.ACCESS_RIGHT_OPEN)
// a.setSchemeid(ModelConstants.DNET_ACCESS_MODES)
// a.setSchemename(ModelConstants.DNET_ACCESS_MODES)
// a.setOpenAccessRoute(colour.get)
// i.setAccessright(a)
// }
// i.setPid(result.getPid)
// result.getInstance().add(i)
// }
// }
} }
private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = { private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = {

View File

@ -0,0 +1,232 @@
{
"indexed": {
"date-parts": [
[
2022,
4,
3
]
],
"date-time": "2022-04-03T01:45:59Z",
"timestamp": 1648950359167
},
"reference-count": 0,
"publisher": "American Society of Clinical Oncology (ASCO)",
"issue": "18_suppl",
"content-domain": {
"domain": [],
"crossmark-restriction": false
},
"short-container-title": [
"JCO"
],
"published-print": {
"date-parts": [
[
2007,
6,
20
]
]
},
"abstract": "<jats:p> 3507 </jats:p><jats:p> Purpose: To detect IGF-1R on circulating tumor cells (CTCs) as a biomarker in the clinical development of a monoclonal human antibody, CP-751,871, targeting IGF-1R. Experimental Design: An automated sample preparation and analysis system for enumerating CTCs (Celltracks) was adapted for detecting IGF-1R positive CTCs with a diagnostic antibody targeting a different IGF-1R epitope to CP-751,871. This assay was utilized in three phase I trials of CP-751,871 as a single agent or with chemotherapy and was validated using cell lines and blood samples from healthy volunteers and patients with metastatic carcinoma. Results: There was no interference between the analytical and therapeutic antibodies. CP-751,871 was well tolerated as a single agent, and in combination with docetaxel or carboplatin and paclitaxel, at doses ranging from 0.05 mg/kg to 20 mg/kg. Eighty patients were enrolled on phase 1 studies of CP-751,871, with 47 (59%) patients having CTCs detected during the study. Prior to treatment 26 patients (33%) had CTCs, with 23 having detectable IGF-1R positive CTCs. CP-751,871 alone, and CP-751,871 with cytotoxic chemotherapy, decreased CTCs and IGF-1R positive CTCs; these increased towards the end of the 21-day cycle in some patients, falling again with retreatment. CTCs were commonest in advanced hormone refractory prostate cancer (11/20). Detectable IGF-1R expression on CTCs before treatment with CP-751,871 and docetaxel was associated with a higher frequency of PSA decline by more than 50% (6/10 vs 2/8 patients). A relationship was observed between sustained falls in CTCs counts and PSA declines by more than 50%. Conclusions: IGF-1R expression is detectable by immunofluorescence on CTCs. These data support the further evaluation of CTCs in pharmacodynamic studies and patient selection, particularly in advanced prostate cancer. </jats:p><jats:p> No significant financial relationships to disclose. </jats:p>",
"DOI": "10.1200/jco.2007.25.18_suppl.3507",
"type": "journal-article",
"created": {
"date-parts": [
[
2020,
3,
6
]
],
"date-time": "2020-03-06T20:50:42Z",
"timestamp": 1583527842000
},
"page": "3507-3507",
"source": "Crossref",
"is-referenced-by-count": 0,
"title": [
"Circulating tumor cells expressing the insulin growth factor-1 receptor (IGF-1R): Method of detection, incidence and potential applications"
],
"prefix": "10.1200",
"volume": "25",
"author": [
{
"given": "J. S.",
"family": "de Bono",
"sequence": "first",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "A.",
"family": "Adjei",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "G.",
"family": "Attard",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "M.",
"family": "Pollak",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "P.",
"family": "Fong",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "P.",
"family": "Haluska",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "L.",
"family": "Roberts",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "D.",
"family": "Chainese",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "L.",
"family": "Terstappen",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
},
{
"given": "A.",
"family": "Gualberto",
"sequence": "additional",
"affiliation": [
{
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
}
]
}
],
"member": "233",
"container-title": [
"Journal of Clinical Oncology"
],
"original-title": [],
"language": "en",
"deposited": {
"date-parts": [
[
2020,
3,
6
]
],
"date-time": "2020-03-06T20:51:03Z",
"timestamp": 1583527863000
},
"score": 1,
"resource": {
"primary": {
"URL": "http://ascopubs.org/doi/10.1200/jco.2007.25.18_suppl.3507"
}
},
"subtitle": [],
"short-title": [],
"issued": {
"date-parts": [
[
2007,
6,
20
]
]
},
"references-count": 0,
"journal-issue": {
"issue": "18_suppl",
"published-print": {
"date-parts": [
[
2007,
6,
20
]
]
}
},
"alternative-id": [
"10.1200/jco.2007.25.18_suppl.3507"
],
"URL": "http://dx.doi.org/10.1200/jco.2007.25.18_suppl.3507",
"relation": {},
"ISSN": [
"0732-183X",
"1527-7755"
],
"issn-type": [
{
"value": "0732-183X",
"type": "print"
},
{
"value": "1527-7755",
"type": "electronic"
}
],
"subject": [],
"published": {
"date-parts": [
[
2007,
6,
20
]
]
}
}

View File

@ -25,8 +25,18 @@ class CrossrefMappingTest extends AbstractVocabularyTest {
val input = val input =
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8") IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8")
println(Crossref2Oaf.convert(input, vocabularies, TransformationType.All)) Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => {
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(record))
})
} }
@Test
def mappingAffiliation(): Unit = {
val input =
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8")
val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult)
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(data.head))
}
} }