forked from D-Net/dnet-hadoop
Merge pull request 'Crossref Enhancements:' (#511) from crossref_mapping_improvement into beta
Reviewed-on: D-Net/dnet-hadoop#511
This commit is contained in:
commit
77308ed525
|
@ -37,7 +37,7 @@ case class mappingAuthor(
|
|||
family: Option[String],
|
||||
sequence: Option[String],
|
||||
ORCID: Option[String],
|
||||
affiliation: Option[mappingAffiliation]
|
||||
affiliation: Option[List[mappingAffiliation]]
|
||||
) {}
|
||||
|
||||
case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {}
|
||||
|
@ -457,15 +457,14 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
//Mapping Author
|
||||
val authorList: List[mappingAuthor] =
|
||||
(json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined)
|
||||
val authorList: List[mappingAuthor] = (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined)
|
||||
|
||||
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
|
||||
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
|
||||
)
|
||||
|
||||
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
|
||||
generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index)
|
||||
generateAuthor(a.given.orNull, a.family.get, a.ORCID.orNull, index, a.affiliation)
|
||||
}.asJava)
|
||||
|
||||
// Mapping instance
|
||||
|
@ -504,18 +503,6 @@ case object Crossref2Oaf {
|
|||
)
|
||||
}
|
||||
|
||||
val is_review = json \ "relation" \ "is-review-of" \ "id"
|
||||
|
||||
if (is_review != JNothing) {
|
||||
instance.setInstancetype(
|
||||
OafMapperUtils.qualifier(
|
||||
"0015",
|
||||
"peerReviewed",
|
||||
ModelConstants.DNET_REVIEW_LEVELS,
|
||||
ModelConstants.DNET_REVIEW_LEVELS
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
if (doi.startsWith("10.3410") || doi.startsWith("10.12703"))
|
||||
instance.setHostedby(
|
||||
|
@ -569,17 +556,24 @@ case object Crossref2Oaf {
|
|||
result
|
||||
}
|
||||
|
||||
|
||||
|
||||
def generateIdentifier(oaf: Result, doi: String): String = {
|
||||
val id = DHPUtils.md5(doi.toLowerCase)
|
||||
s"50|doiboost____|$id"
|
||||
}
|
||||
|
||||
def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = {
|
||||
private def generateAuthor(given: String, family: String, orcid: String, index: Int, affiliation: Option[List[mappingAffiliation]]): Author = {
|
||||
val a = new Author
|
||||
a.setName(given)
|
||||
a.setSurname(family)
|
||||
a.setFullname(s"$given $family")
|
||||
a.setRank(index + 1)
|
||||
|
||||
// Adding Raw affiliation if it's defined
|
||||
if (affiliation.isDefined) {
|
||||
a.setRawAffiliationString(affiliation.get.map(a => a.name).asJava)
|
||||
}
|
||||
if (StringUtils.isNotBlank(orcid))
|
||||
a.setPid(
|
||||
List(
|
||||
|
@ -705,11 +699,21 @@ case object Crossref2Oaf {
|
|||
val objectType = (json \ "type").extractOrElse[String](null)
|
||||
if (objectType == null)
|
||||
return resultList
|
||||
val typology = getTypeQualifier(objectType, vocabularies)
|
||||
|
||||
|
||||
// If the item has a relations is-review-of, then we force it to a peer-review
|
||||
val is_review = json \ "relation" \ "is-review-of" \ "id"
|
||||
var force_to_review = false
|
||||
if (is_review != JNothing) {
|
||||
force_to_review = true
|
||||
}
|
||||
|
||||
val typology = getTypeQualifier(if (force_to_review) "peer-review" else objectType, vocabularies)
|
||||
|
||||
if (typology == null)
|
||||
return List()
|
||||
|
||||
|
||||
val result = generateItemFromType(typology._2)
|
||||
if (result == null)
|
||||
return List()
|
||||
|
@ -757,33 +761,6 @@ case object Crossref2Oaf {
|
|||
else
|
||||
resultList
|
||||
}
|
||||
|
||||
// if (uw != null) {
|
||||
// result.getCollectedfrom.add(createUnpayWallCollectedFrom())
|
||||
// val i: Instance = new Instance()
|
||||
// i.setCollectedfrom(createUnpayWallCollectedFrom())
|
||||
// if (uw.best_oa_location != null) {
|
||||
//
|
||||
// i.setUrl(List(uw.best_oa_location.url).asJava)
|
||||
// if (uw.best_oa_location.license.isDefined) {
|
||||
// i.setLicense(field[String](uw.best_oa_location.license.get, null))
|
||||
// }
|
||||
//
|
||||
// val colour = get_unpaywall_color(uw.oa_status)
|
||||
// if (colour.isDefined) {
|
||||
// val a = new AccessRight
|
||||
// a.setClassid(ModelConstants.ACCESS_RIGHT_OPEN)
|
||||
// a.setClassname(ModelConstants.ACCESS_RIGHT_OPEN)
|
||||
// a.setSchemeid(ModelConstants.DNET_ACCESS_MODES)
|
||||
// a.setSchemename(ModelConstants.DNET_ACCESS_MODES)
|
||||
// a.setOpenAccessRoute(colour.get)
|
||||
// i.setAccessright(a)
|
||||
// }
|
||||
// i.setPid(result.getPid)
|
||||
// result.getInstance().add(i)
|
||||
// }
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = {
|
||||
|
|
|
@ -0,0 +1,232 @@
|
|||
{
|
||||
"indexed": {
|
||||
"date-parts": [
|
||||
[
|
||||
2022,
|
||||
4,
|
||||
3
|
||||
]
|
||||
],
|
||||
"date-time": "2022-04-03T01:45:59Z",
|
||||
"timestamp": 1648950359167
|
||||
},
|
||||
"reference-count": 0,
|
||||
"publisher": "American Society of Clinical Oncology (ASCO)",
|
||||
"issue": "18_suppl",
|
||||
"content-domain": {
|
||||
"domain": [],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"short-container-title": [
|
||||
"JCO"
|
||||
],
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2007,
|
||||
6,
|
||||
20
|
||||
]
|
||||
]
|
||||
},
|
||||
"abstract": "<jats:p> 3507 </jats:p><jats:p> Purpose: To detect IGF-1R on circulating tumor cells (CTCs) as a biomarker in the clinical development of a monoclonal human antibody, CP-751,871, targeting IGF-1R. Experimental Design: An automated sample preparation and analysis system for enumerating CTCs (Celltracks) was adapted for detecting IGF-1R positive CTCs with a diagnostic antibody targeting a different IGF-1R epitope to CP-751,871. This assay was utilized in three phase I trials of CP-751,871 as a single agent or with chemotherapy and was validated using cell lines and blood samples from healthy volunteers and patients with metastatic carcinoma. Results: There was no interference between the analytical and therapeutic antibodies. CP-751,871 was well tolerated as a single agent, and in combination with docetaxel or carboplatin and paclitaxel, at doses ranging from 0.05 mg/kg to 20 mg/kg. Eighty patients were enrolled on phase 1 studies of CP-751,871, with 47 (59%) patients having CTCs detected during the study. Prior to treatment 26 patients (33%) had CTCs, with 23 having detectable IGF-1R positive CTCs. CP-751,871 alone, and CP-751,871 with cytotoxic chemotherapy, decreased CTCs and IGF-1R positive CTCs; these increased towards the end of the 21-day cycle in some patients, falling again with retreatment. CTCs were commonest in advanced hormone refractory prostate cancer (11/20). Detectable IGF-1R expression on CTCs before treatment with CP-751,871 and docetaxel was associated with a higher frequency of PSA decline by more than 50% (6/10 vs 2/8 patients). A relationship was observed between sustained falls in CTCs counts and PSA declines by more than 50%. Conclusions: IGF-1R expression is detectable by immunofluorescence on CTCs. These data support the further evaluation of CTCs in pharmacodynamic studies and patient selection, particularly in advanced prostate cancer. </jats:p><jats:p> No significant financial relationships to disclose. </jats:p>",
|
||||
"DOI": "10.1200/jco.2007.25.18_suppl.3507",
|
||||
"type": "journal-article",
|
||||
"created": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
3,
|
||||
6
|
||||
]
|
||||
],
|
||||
"date-time": "2020-03-06T20:50:42Z",
|
||||
"timestamp": 1583527842000
|
||||
},
|
||||
"page": "3507-3507",
|
||||
"source": "Crossref",
|
||||
"is-referenced-by-count": 0,
|
||||
"title": [
|
||||
"Circulating tumor cells expressing the insulin growth factor-1 receptor (IGF-1R): Method of detection, incidence and potential applications"
|
||||
],
|
||||
"prefix": "10.1200",
|
||||
"volume": "25",
|
||||
"author": [
|
||||
{
|
||||
"given": "J. S.",
|
||||
"family": "de Bono",
|
||||
"sequence": "first",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "A.",
|
||||
"family": "Adjei",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "G.",
|
||||
"family": "Attard",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "M.",
|
||||
"family": "Pollak",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "P.",
|
||||
"family": "Fong",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "P.",
|
||||
"family": "Haluska",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "L.",
|
||||
"family": "Roberts",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "D.",
|
||||
"family": "Chainese",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "L.",
|
||||
"family": "Terstappen",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "A.",
|
||||
"family": "Gualberto",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
{
|
||||
"name": "Royal Marsden Hospital, Surrey, United Kingdom; Mayo Clinic, Rochester, MN; McGill University & Lady Davis Research Institute, Montreal, PQ, Canada; Pfizer Global Research & Development, New London, CT; Immunicon Corporation, Huntingdon Valley, PA"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"member": "233",
|
||||
"container-title": [
|
||||
"Journal of Clinical Oncology"
|
||||
],
|
||||
"original-title": [],
|
||||
"language": "en",
|
||||
"deposited": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
3,
|
||||
6
|
||||
]
|
||||
],
|
||||
"date-time": "2020-03-06T20:51:03Z",
|
||||
"timestamp": 1583527863000
|
||||
},
|
||||
"score": 1,
|
||||
"resource": {
|
||||
"primary": {
|
||||
"URL": "http://ascopubs.org/doi/10.1200/jco.2007.25.18_suppl.3507"
|
||||
}
|
||||
},
|
||||
"subtitle": [],
|
||||
"short-title": [],
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2007,
|
||||
6,
|
||||
20
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count": 0,
|
||||
"journal-issue": {
|
||||
"issue": "18_suppl",
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2007,
|
||||
6,
|
||||
20
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"alternative-id": [
|
||||
"10.1200/jco.2007.25.18_suppl.3507"
|
||||
],
|
||||
"URL": "http://dx.doi.org/10.1200/jco.2007.25.18_suppl.3507",
|
||||
"relation": {},
|
||||
"ISSN": [
|
||||
"0732-183X",
|
||||
"1527-7755"
|
||||
],
|
||||
"issn-type": [
|
||||
{
|
||||
"value": "0732-183X",
|
||||
"type": "print"
|
||||
},
|
||||
{
|
||||
"value": "1527-7755",
|
||||
"type": "electronic"
|
||||
}
|
||||
],
|
||||
"subject": [],
|
||||
"published": {
|
||||
"date-parts": [
|
||||
[
|
||||
2007,
|
||||
6,
|
||||
20
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
|
@ -3,12 +3,15 @@ package eu.dnetlib.dhp.collection.crossref
|
|||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import eu.dnetlib.dhp.collection.crossref.Crossref2Oaf.TransformationType
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||
import org.junit.jupiter.api.{Assertions, BeforeEach, Test}
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.mockito.junit.jupiter.MockitoExtension
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.collection.JavaConverters.asScalaBufferConverter
|
||||
|
||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||
class CrossrefMappingTest extends AbstractVocabularyTest {
|
||||
|
||||
|
@ -25,8 +28,28 @@ class CrossrefMappingTest extends AbstractVocabularyTest {
|
|||
val input =
|
||||
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8")
|
||||
|
||||
println(Crossref2Oaf.convert(input, vocabularies, TransformationType.All))
|
||||
Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => {
|
||||
Assertions.assertNotNull(record)
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def mappingAffiliation(): Unit = {
|
||||
val input =
|
||||
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8")
|
||||
val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult)
|
||||
data.foreach(record => {
|
||||
Assertions.assertNotNull(record)
|
||||
Assertions.assertTrue(record.isInstanceOf[Publication])
|
||||
val publication = record.asInstanceOf[Publication]
|
||||
publication.getAuthor.asScala.foreach(author => {
|
||||
Assertions.assertNotNull(author.getRawAffiliationString)
|
||||
Assertions.assertTrue(author.getRawAffiliationString.size()>0)
|
||||
|
||||
})
|
||||
})
|
||||
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(data.head))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue