forked from D-Net/dnet-hadoop
Crossref Enhancements:
-Accurate Review Type Assignment: Resolved an issue identified in ticket https://support.openaire.eu/issues/9525#note-13. When a relationship of "is-review-of" is detected, the publication type is now correctly set to "Review." -Enhanced Author Affiliation Data: Implemented Miriam's suggestion by including a new field, "RawAffiliationString," in each author entry. This additional data provides a more granular level of detail regarding author affiliations, potentially improving discoverability and research analysis.
This commit is contained in:
parent
a42c8b7c85
commit
a1297082e2
|
@ -37,7 +37,7 @@ case class mappingAuthor(
|
||||||
family: Option[String],
|
family: Option[String],
|
||||||
sequence: Option[String],
|
sequence: Option[String],
|
||||||
ORCID: Option[String],
|
ORCID: Option[String],
|
||||||
affiliation: Option[mappingAffiliation]
|
affiliation: Option[List[mappingAffiliation]]
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {}
|
case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {}
|
||||||
|
@ -457,15 +457,14 @@ case object Crossref2Oaf {
|
||||||
}
|
}
|
||||||
|
|
||||||
//Mapping Author
|
//Mapping Author
|
||||||
val authorList: List[mappingAuthor] =
|
val authorList: List[mappingAuthor] = (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined)
|
||||||
(json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined)
|
|
||||||
|
|
||||||
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
|
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
|
||||||
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
|
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
|
||||||
)
|
)
|
||||||
|
|
||||||
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
|
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
|
||||||
generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index)
|
generateAuthor(a.given.orNull, a.family.get, a.ORCID.orNull, index, a.affiliation)
|
||||||
}.asJava)
|
}.asJava)
|
||||||
|
|
||||||
// Mapping instance
|
// Mapping instance
|
||||||
|
@ -504,18 +503,6 @@ case object Crossref2Oaf {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
val is_review = json \ "relation" \ "is-review-of" \ "id"
|
|
||||||
|
|
||||||
if (is_review != JNothing) {
|
|
||||||
instance.setInstancetype(
|
|
||||||
OafMapperUtils.qualifier(
|
|
||||||
"0015",
|
|
||||||
"peerReviewed",
|
|
||||||
ModelConstants.DNET_REVIEW_LEVELS,
|
|
||||||
ModelConstants.DNET_REVIEW_LEVELS
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (doi.startsWith("10.3410") || doi.startsWith("10.12703"))
|
if (doi.startsWith("10.3410") || doi.startsWith("10.12703"))
|
||||||
instance.setHostedby(
|
instance.setHostedby(
|
||||||
|
@ -569,17 +556,24 @@ case object Crossref2Oaf {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def generateIdentifier(oaf: Result, doi: String): String = {
|
def generateIdentifier(oaf: Result, doi: String): String = {
|
||||||
val id = DHPUtils.md5(doi.toLowerCase)
|
val id = DHPUtils.md5(doi.toLowerCase)
|
||||||
s"50|doiboost____|$id"
|
s"50|doiboost____|$id"
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = {
|
private def generateAuthor(given: String, family: String, orcid: String, index: Int, affiliation: Option[List[mappingAffiliation]]): Author = {
|
||||||
val a = new Author
|
val a = new Author
|
||||||
a.setName(given)
|
a.setName(given)
|
||||||
a.setSurname(family)
|
a.setSurname(family)
|
||||||
a.setFullname(s"$given $family")
|
a.setFullname(s"$given $family")
|
||||||
a.setRank(index + 1)
|
a.setRank(index + 1)
|
||||||
|
|
||||||
|
// Adding Raw affiliation if it's defined
|
||||||
|
if (affiliation.isDefined) {
|
||||||
|
a.setRawAffiliationString(affiliation.get.map(a => a.name).asJava)
|
||||||
|
}
|
||||||
if (StringUtils.isNotBlank(orcid))
|
if (StringUtils.isNotBlank(orcid))
|
||||||
a.setPid(
|
a.setPid(
|
||||||
List(
|
List(
|
||||||
|
@ -705,11 +699,21 @@ case object Crossref2Oaf {
|
||||||
val objectType = (json \ "type").extractOrElse[String](null)
|
val objectType = (json \ "type").extractOrElse[String](null)
|
||||||
if (objectType == null)
|
if (objectType == null)
|
||||||
return resultList
|
return resultList
|
||||||
val typology = getTypeQualifier(objectType, vocabularies)
|
|
||||||
|
|
||||||
|
// If the item has a relations is-review-of, then we force it to a peer-review
|
||||||
|
val is_review = json \ "relation" \ "is-review-of" \ "id"
|
||||||
|
var force_to_review = false
|
||||||
|
if (is_review != JNothing) {
|
||||||
|
force_to_review = true
|
||||||
|
}
|
||||||
|
|
||||||
|
val typology = getTypeQualifier(if (force_to_review) "peer-review" else objectType, vocabularies)
|
||||||
|
|
||||||
if (typology == null)
|
if (typology == null)
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
|
|
||||||
val result = generateItemFromType(typology._2)
|
val result = generateItemFromType(typology._2)
|
||||||
if (result == null)
|
if (result == null)
|
||||||
return List()
|
return List()
|
||||||
|
@ -757,33 +761,6 @@ case object Crossref2Oaf {
|
||||||
else
|
else
|
||||||
resultList
|
resultList
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (uw != null) {
|
|
||||||
// result.getCollectedfrom.add(createUnpayWallCollectedFrom())
|
|
||||||
// val i: Instance = new Instance()
|
|
||||||
// i.setCollectedfrom(createUnpayWallCollectedFrom())
|
|
||||||
// if (uw.best_oa_location != null) {
|
|
||||||
//
|
|
||||||
// i.setUrl(List(uw.best_oa_location.url).asJava)
|
|
||||||
// if (uw.best_oa_location.license.isDefined) {
|
|
||||||
// i.setLicense(field[String](uw.best_oa_location.license.get, null))
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// val colour = get_unpaywall_color(uw.oa_status)
|
|
||||||
// if (colour.isDefined) {
|
|
||||||
// val a = new AccessRight
|
|
||||||
// a.setClassid(ModelConstants.ACCESS_RIGHT_OPEN)
|
|
||||||
// a.setClassname(ModelConstants.ACCESS_RIGHT_OPEN)
|
|
||||||
// a.setSchemeid(ModelConstants.DNET_ACCESS_MODES)
|
|
||||||
// a.setSchemename(ModelConstants.DNET_ACCESS_MODES)
|
|
||||||
// a.setOpenAccessRoute(colour.get)
|
|
||||||
// i.setAccessright(a)
|
|
||||||
// }
|
|
||||||
// i.setPid(result.getPid)
|
|
||||||
// result.getInstance().add(i)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = {
|
private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = {
|
||||||
|
|
|
@ -0,0 +1,232 @@
|
||||||
|
{
|
||||||
|
"indexed": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2022,
|
||||||
|
4,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"date-time": "2022-04-03T01:45:59Z",
|
||||||
|
"timestamp": 1648950359167
|
||||||
|
},
|
||||||
|
"reference-count": 0,
|
||||||
|
"publisher": "American Society of Clinical Oncology (ASCO)",
|
||||||
|
"issue": "18_suppl",
|
||||||
|
"content-domain": {
|
||||||
|
"domain": [],
|
||||||
|
"crossmark-restriction": false
|
||||||
|
},
|
||||||
|
"short-container-title": [
|
||||||
|
"JCO"
|
||||||
|
],
|
||||||
|
"published-print": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2007,
|
||||||
|
6,
|
||||||
|
20
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"abstract": "<jats:p> 3507 </jats:p><jats:p> Purpose: To detect IGF-1R on circulating tumor cells (CTCs) as a biomarker in the clinical development of a monoclonal human antibody, CP-751,871, targeting IGF-1R. Experimental Design: An automated sample preparation and analysis system for enumerating CTCs (Celltracks) was adapted for detecting IGF-1R positive CTCs with a diagnostic antibody targeting a different IGF-1R epitope to CP-751,871. This assay was utilized in three phase I trials of CP-751,871 as a single agent or with chemotherapy and was validated using cell lines and blood samples from healthy volunteers and patients with metastatic carcinoma. Results: There was no interference between the analytical and therapeutic antibodies. CP-751,871 was well tolerated as a single agent, and in combination with docetaxel or carboplatin and paclitaxel, at doses ranging from 0.05 mg/kg to 20 mg/kg. Eighty patients were enrolled on phase 1 studies of CP-751,871, with 47 (59%) patients having CTCs detected during the study. Prior to treatment 26 patients (33%) had CTCs, with 23 having detectable IGF-1R positive CTCs. CP-751,871 alone, and CP-751,871 with cytotoxic chemotherapy, decreased CTCs and IGF-1R positive CTCs; these increased towards the end of the 21-day cycle in some patients, falling again with retreatment. CTCs were commonest in advanced hormone refractory prostate cancer (11/20). Detectable IGF-1R expression on CTCs before treatment with CP-751,871 and docetaxel was associated with a higher frequency of PSA decline by more than 50% (6/10 vs 2/8 patients). A relationship was observed between sustained falls in CTCs counts and PSA declines by more than 50%. Conclusions: IGF-1R expression is detectable by immunofluorescence on CTCs. These data support the further evaluation of CTCs in pharmacodynamic studies and patient selection, particularly in advanced prostate cancer. </jats:p><jats:p> No significant financial relationships to disclose. </jats:p>",
|
||||||
|
"DOI": "10.1200/jco.2007.25.18_suppl.3507",
|
||||||
|
"type": "journal-article",
|
||||||
|
"created": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
3,
|
||||||
|
6
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"date-time": "2020-03-06T20:50:42Z",
|
||||||
|
"timestamp": 1583527842000
|
||||||
|
},
|
||||||
|
"page": "3507-3507",
|
||||||
|
"source": "Crossref",
|
||||||
|
"is-referenced-by-count": 0,
|
||||||
|
"title": [
|
||||||
|
"Circulating tumor cells expressing the insulin growth factor-1 receptor (IGF-1R): Method of detection, incidence and potential applications"
|
||||||
|
],
|
||||||
|
"prefix": "10.1200",
|
||||||
|
"volume": "25",
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"given": "J. S.",
|
||||||
|
"family": "de Bono",
|
||||||
|
"sequence": "first",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "A.",
|
||||||
|
"family": "Adjei",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "G.",
|
||||||
|
"family": "Attard",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "M.",
|
||||||
|
"family": "Pollak",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "P.",
|
||||||
|
"family": "Fong",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "P.",
|
||||||
|
"family": "Haluska",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "L.",
|
||||||
|
"family": "Roberts",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "D.",
|
||||||
|
"family": "Chainese",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "L.",
|
||||||
|
"family": "Terstappen",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"given": "A.",
|
||||||
|
"family": "Gualberto",
|
||||||
|
"sequence": "additional",
|
||||||
|
"affiliation": [
|
||||||
|
{
|
||||||
|
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"member": "233",
|
||||||
|
"container-title": [
|
||||||
|
"Journal of Clinical Oncology"
|
||||||
|
],
|
||||||
|
"original-title": [],
|
||||||
|
"language": "en",
|
||||||
|
"deposited": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2020,
|
||||||
|
3,
|
||||||
|
6
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"date-time": "2020-03-06T20:51:03Z",
|
||||||
|
"timestamp": 1583527863000
|
||||||
|
},
|
||||||
|
"score": 1,
|
||||||
|
"resource": {
|
||||||
|
"primary": {
|
||||||
|
"URL": "http://ascopubs.org/doi/10.1200/jco.2007.25.18_suppl.3507"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subtitle": [],
|
||||||
|
"short-title": [],
|
||||||
|
"issued": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2007,
|
||||||
|
6,
|
||||||
|
20
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"references-count": 0,
|
||||||
|
"journal-issue": {
|
||||||
|
"issue": "18_suppl",
|
||||||
|
"published-print": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2007,
|
||||||
|
6,
|
||||||
|
20
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"alternative-id": [
|
||||||
|
"10.1200/jco.2007.25.18_suppl.3507"
|
||||||
|
],
|
||||||
|
"URL": "http://dx.doi.org/10.1200/jco.2007.25.18_suppl.3507",
|
||||||
|
"relation": {},
|
||||||
|
"ISSN": [
|
||||||
|
"0732-183X",
|
||||||
|
"1527-7755"
|
||||||
|
],
|
||||||
|
"issn-type": [
|
||||||
|
{
|
||||||
|
"value": "0732-183X",
|
||||||
|
"type": "print"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"value": "1527-7755",
|
||||||
|
"type": "electronic"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"subject": [],
|
||||||
|
"published": {
|
||||||
|
"date-parts": [
|
||||||
|
[
|
||||||
|
2007,
|
||||||
|
6,
|
||||||
|
20
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,8 +25,18 @@ class CrossrefMappingTest extends AbstractVocabularyTest {
|
||||||
val input =
|
val input =
|
||||||
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8")
|
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8")
|
||||||
|
|
||||||
println(Crossref2Oaf.convert(input, vocabularies, TransformationType.All))
|
Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => {
|
||||||
|
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(record))
|
||||||
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def mappingAffiliation(): Unit = {
|
||||||
|
val input =
|
||||||
|
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8")
|
||||||
|
val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult)
|
||||||
|
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(data.head))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue