|
|
|
@ -2,90 +2,34 @@ package eu.dnetlib.dhp.crossref
|
|
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
|
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
|
|
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants.OPEN_ACCESS_RIGHT
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf._
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils._
|
|
|
|
|
import org.apache.commons.lang.StringUtils
|
|
|
|
|
import org.json4s
|
|
|
|
|
import org.json4s.DefaultFormats
|
|
|
|
|
import org.json4s.JsonAST.{JField, JObject, JString, JValue}
|
|
|
|
|
import org.json4s.JsonAST._
|
|
|
|
|
import org.json4s.jackson.JsonMethods.parse
|
|
|
|
|
import org.slf4j.{Logger, LoggerFactory}
|
|
|
|
|
|
|
|
|
|
import java.time.LocalDate
|
|
|
|
|
import java.time.format.DateTimeFormatter
|
|
|
|
|
import scala.collection.JavaConverters._
|
|
|
|
|
import scala.collection.mutable.ListBuffer
|
|
|
|
|
import scala.util.matching.Regex
|
|
|
|
|
|
|
|
|
|
case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
|
|
|
|
|
|
|
|
|
|
case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){}
|
|
|
|
|
|
|
|
|
|
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
|
|
|
|
|
|
|
|
|
object CrossrefUtility {
|
|
|
|
|
val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
|
|
|
|
|
val DOI_PREFIX = "10."
|
|
|
|
|
val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME)
|
|
|
|
|
|
|
|
|
|
def normalizeDoi(input: String): String = {
|
|
|
|
|
if (input == null)
|
|
|
|
|
return null
|
|
|
|
|
val replaced = input
|
|
|
|
|
.replaceAll("(?:\\n|\\r|\\t|\\s)", "")
|
|
|
|
|
.toLowerCase
|
|
|
|
|
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
|
|
|
|
|
if (replaced == null || replaced.trim.isEmpty)
|
|
|
|
|
return null
|
|
|
|
|
if (replaced.indexOf("10.") < 0)
|
|
|
|
|
return null
|
|
|
|
|
val ret = replaced.substring(replaced.indexOf("10."))
|
|
|
|
|
if (!ret.startsWith(DOI_PREFIX))
|
|
|
|
|
return null
|
|
|
|
|
ret
|
|
|
|
|
}
|
|
|
|
|
val logger: Logger = LoggerFactory.getLogger(getClass)
|
|
|
|
|
|
|
|
|
|
def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
|
|
|
|
if (StringUtils.isNotBlank(dt))
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dt)
|
|
|
|
|
if (datePart != null && datePart.size == 1) {
|
|
|
|
|
val res = datePart.head
|
|
|
|
|
if (res.size == 3) {
|
|
|
|
|
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
|
|
|
|
|
if (dp.length == 10) {
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dp)
|
|
|
|
|
}
|
|
|
|
|
} else if (res.size == 2) {
|
|
|
|
|
val dp = f"${res.head}-${res(1)}%02d-01"
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dp)
|
|
|
|
|
} else if (res.size == 1) {
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
null
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def generateDate(
|
|
|
|
|
dt: String,
|
|
|
|
|
datePart: List[List[Int]],
|
|
|
|
|
classId: String,
|
|
|
|
|
schemeId: String
|
|
|
|
|
): StructuredProperty = {
|
|
|
|
|
val dp = extractDate(dt, datePart)
|
|
|
|
|
if (StringUtils.isNotBlank(dp))
|
|
|
|
|
structuredProperty(dp, classId, classId, schemeId)
|
|
|
|
|
else
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = {
|
|
|
|
|
val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
|
|
|
|
|
if (term != null) {
|
|
|
|
|
val resourceType =
|
|
|
|
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
|
|
|
|
|
|
|
|
|
|
resourceType match {
|
|
|
|
|
case "publication" => (new Publication, resourceType)
|
|
|
|
|
case "dataset" => (new Dataset, resourceType)
|
|
|
|
|
case "software" => (new Software, resourceType)
|
|
|
|
|
case "otherresearchproduct" => (new OtherResearchProduct, resourceType)
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
|
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
|
|
@ -103,7 +47,8 @@ object CrossrefUtility {
|
|
|
|
|
|
|
|
|
|
val result = resultWithType._1
|
|
|
|
|
val cOBJCategory = resultWithType._2
|
|
|
|
|
mappingResult(result, json, cOBJCategory)
|
|
|
|
|
val className = resultWithType._3
|
|
|
|
|
mappingResult(result, json, cOBJCategory, className)
|
|
|
|
|
if (result == null || result.getId == null)
|
|
|
|
|
return List()
|
|
|
|
|
|
|
|
|
@ -111,29 +56,144 @@ object CrossrefUtility {
|
|
|
|
|
(json \ "funder").extractOrElse[List[mappingFunder]](List())
|
|
|
|
|
|
|
|
|
|
if (funderList.nonEmpty) {
|
|
|
|
|
resultList = resultList ::: mappingFunderToRelations(
|
|
|
|
|
funderList,
|
|
|
|
|
result.getId,
|
|
|
|
|
createCrossrefCollectedFrom(),
|
|
|
|
|
result.getDataInfo,
|
|
|
|
|
result.getLastupdatetimestamp
|
|
|
|
|
)
|
|
|
|
|
resultList = resultList ::: mappingFunderToRelations(funderList, result )
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result match {
|
|
|
|
|
case publication: Publication => convertPublication(publication, json, cOBJCategory)
|
|
|
|
|
case dataset: Dataset => convertDataset(dataset)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resultList = resultList ::: List(result)
|
|
|
|
|
resultList
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
|
|
|
|
|
private def createRelation(sourceId: String, targetId: String, relClass: String): Relation = {
|
|
|
|
|
val r = new Relation
|
|
|
|
|
r.setSource(sourceId)
|
|
|
|
|
r.setTarget(targetId)
|
|
|
|
|
r.setRelType(ModelConstants.RESULT_PROJECT)
|
|
|
|
|
r.setRelClass(relClass)
|
|
|
|
|
r.setSubRelType(ModelConstants.OUTCOME)
|
|
|
|
|
r.setProvenance(List(OafMapperUtils.getProvenance(CROSSREF_COLLECTED_FROM, null)).asJava)
|
|
|
|
|
r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private def generateSimpleRelationFromAward(
|
|
|
|
|
funder: mappingFunder,
|
|
|
|
|
nsPrefix: String,
|
|
|
|
|
extractField: String => String,
|
|
|
|
|
source:Result
|
|
|
|
|
): List[Relation] = {
|
|
|
|
|
if (funder.award.isDefined && funder.award.get.nonEmpty)
|
|
|
|
|
funder.award.get
|
|
|
|
|
.map(extractField)
|
|
|
|
|
.filter(a => a != null && a.nonEmpty)
|
|
|
|
|
.map(award => {
|
|
|
|
|
val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true)
|
|
|
|
|
createRelation(targetId, source.getId, ModelConstants.PRODUCES)
|
|
|
|
|
})
|
|
|
|
|
else List()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def extractECAward(award: String): String = {
|
|
|
|
|
val awardECRegex: Regex = "[0-9]{4,9}".r
|
|
|
|
|
if (awardECRegex.findAllIn(award).hasNext)
|
|
|
|
|
return awardECRegex.findAllIn(award).max
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def snsfRule(award: String): String = {
|
|
|
|
|
val tmp1 = StringUtils.substringAfter(award, "_")
|
|
|
|
|
val tmp2 = StringUtils.substringBefore(tmp1, "/")
|
|
|
|
|
tmp2
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = {
|
|
|
|
|
var relList:List[Relation] = List()
|
|
|
|
|
|
|
|
|
|
if (funders != null)
|
|
|
|
|
funders.foreach(funder => {
|
|
|
|
|
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
|
|
|
|
|
funder.DOI.get match {
|
|
|
|
|
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
|
|
|
|
|
"10.13039/100010665" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
|
|
|
|
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
|
|
|
|
case "10.13039/501100000781" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
|
|
|
|
case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result)
|
|
|
|
|
case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
|
|
|
|
|
case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result)
|
|
|
|
|
case "10.13039/501100001602" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result)
|
|
|
|
|
case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result)
|
|
|
|
|
case "10.13039/501100000038" =>
|
|
|
|
|
val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false)
|
|
|
|
|
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
|
|
|
|
case "10.13039/501100000155" =>
|
|
|
|
|
val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false)
|
|
|
|
|
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
|
|
|
|
case "10.13039/501100000024" =>
|
|
|
|
|
val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false)
|
|
|
|
|
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
|
|
|
|
case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result)
|
|
|
|
|
case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result)
|
|
|
|
|
case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result)
|
|
|
|
|
case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result)
|
|
|
|
|
case "10.13039/501100003407" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result)
|
|
|
|
|
val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false)
|
|
|
|
|
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
|
|
|
|
case "10.13039/501100006588" | "10.13039/501100004488" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(
|
|
|
|
|
funder,
|
|
|
|
|
"irb_hr______",
|
|
|
|
|
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result
|
|
|
|
|
)
|
|
|
|
|
case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result)
|
|
|
|
|
case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result)
|
|
|
|
|
case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result)
|
|
|
|
|
case "10.13039/100004440" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
|
|
|
|
|
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
|
|
|
|
|
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
|
|
|
|
case _ => logger.debug("no match for " + funder.DOI.get)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
funder.name match {
|
|
|
|
|
case "European Union’s Horizon 2020 research and innovation program" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
|
|
|
|
case "European Union's" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
|
|
|
|
|
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
|
|
|
|
|
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result)
|
|
|
|
|
case "Wellcome Trust Masters Fellowship" =>
|
|
|
|
|
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
|
|
|
|
|
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
|
|
|
|
|
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
|
|
|
|
|
case _ => logger.debug("no match for " + funder.name)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
})
|
|
|
|
|
relList
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = {
|
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
|
|
|
|
|
|
|
|
//MAPPING Crossref DOI into PID
|
|
|
|
|
val doi: String = normalizeDoi((json \ "DOI").extract[String])
|
|
|
|
|
val doi: String = CleaningFunctions.normalizePidValue(ModelConstants.DOI, (json \ "DOI").extract[String])
|
|
|
|
|
|
|
|
|
|
result.setPid(
|
|
|
|
|
List(
|
|
|
|
@ -176,9 +236,7 @@ object CrossrefUtility {
|
|
|
|
|
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
|
|
|
|
|
val subtitles =
|
|
|
|
|
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
|
|
|
|
|
title,
|
|
|
|
|
ModelConstants.SUBTITLE_QUALIFIER
|
|
|
|
|
)
|
|
|
|
|
title, ModelConstants.SUBTITLE_QUALIFIER)
|
|
|
|
|
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
|
|
|
|
|
|
|
|
|
|
// DESCRIPTION
|
|
|
|
@ -244,21 +302,28 @@ object CrossrefUtility {
|
|
|
|
|
|
|
|
|
|
if (subjectList.nonEmpty) {
|
|
|
|
|
result.setSubject(
|
|
|
|
|
subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava
|
|
|
|
|
)
|
|
|
|
|
subjectList.map(s =>
|
|
|
|
|
OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null)
|
|
|
|
|
).asJava)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//Mapping Author
|
|
|
|
|
val authorList: List[mappingAuthor] =
|
|
|
|
|
(json \ "author").extractOrElse[List[mappingAuthor]](List())
|
|
|
|
|
val authorList:List[CrossrefAuthor] =
|
|
|
|
|
for {
|
|
|
|
|
JObject(author) <- json \ "author"
|
|
|
|
|
JField("ORCID", JString(orcid)) <- author
|
|
|
|
|
JField("given", JString(givenName)) <- author
|
|
|
|
|
JField("family", JString(familyName)) <- author
|
|
|
|
|
JField("sequence", JString(sequence)) <- author
|
|
|
|
|
} yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0)
|
|
|
|
|
|
|
|
|
|
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
|
|
|
|
|
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
|
|
|
|
|
generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)
|
|
|
|
|
}.asJava)
|
|
|
|
|
result.setAuthor(authorList.sortWith((a,b) =>{
|
|
|
|
|
if (a.sequence.equalsIgnoreCase("first"))
|
|
|
|
|
true
|
|
|
|
|
else if (b.sequence.equalsIgnoreCase("first"))
|
|
|
|
|
false
|
|
|
|
|
else a.familyName< b.familyName
|
|
|
|
|
}).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava)
|
|
|
|
|
|
|
|
|
|
// Mapping instance
|
|
|
|
|
val instance = new Instance()
|
|
|
|
@ -266,8 +331,8 @@ object CrossrefUtility {
|
|
|
|
|
JObject(license) <- json \ "license"
|
|
|
|
|
JField("URL", JString(lic)) <- license
|
|
|
|
|
JField("content-version", JString(content_version)) <- license
|
|
|
|
|
} yield (asField(lic), content_version)
|
|
|
|
|
val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue))
|
|
|
|
|
} yield (new License(lic), content_version)
|
|
|
|
|
val l = license.filter(d => StringUtils.isNotBlank(d._1.getUrl))
|
|
|
|
|
if (l.nonEmpty) {
|
|
|
|
|
if (l exists (d => d._2.equals("vor"))) {
|
|
|
|
|
for (d <- l) {
|
|
|
|
@ -290,66 +355,225 @@ object CrossrefUtility {
|
|
|
|
|
OafMapperUtils.qualifier(
|
|
|
|
|
"0001",
|
|
|
|
|
"peerReviewed",
|
|
|
|
|
ModelConstants.DNET_REVIEW_LEVELS,
|
|
|
|
|
ModelConstants.DNET_REVIEW_LEVELS
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
instance.setAccessright(
|
|
|
|
|
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
|
|
|
|
)
|
|
|
|
|
if (instance.getLicense!= null)
|
|
|
|
|
instance.setAccessright(
|
|
|
|
|
decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance)
|
|
|
|
|
)
|
|
|
|
|
instance.setInstancetype(
|
|
|
|
|
OafMapperUtils.qualifier(
|
|
|
|
|
cobjCategory.substring(0, 4),
|
|
|
|
|
cobjCategory.substring(5),
|
|
|
|
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
|
|
|
|
cobjCategory,
|
|
|
|
|
className,
|
|
|
|
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
result.setResourcetype(
|
|
|
|
|
OafMapperUtils.qualifier(
|
|
|
|
|
cobjCategory.substring(0, 4),
|
|
|
|
|
cobjCategory.substring(5),
|
|
|
|
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
|
|
|
|
cobjCategory,
|
|
|
|
|
className,
|
|
|
|
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
|
|
|
|
instance.setCollectedfrom(CROSSREF_COLLECTED_FROM)
|
|
|
|
|
if (StringUtils.isNotBlank(issuedDate)) {
|
|
|
|
|
instance.setDateofacceptance(asField(issuedDate))
|
|
|
|
|
instance.setDateofacceptance(issuedDate)
|
|
|
|
|
} else {
|
|
|
|
|
instance.setDateofacceptance(asField(createdDate.getValue))
|
|
|
|
|
instance.setDateofacceptance(createdDate.getValue)
|
|
|
|
|
}
|
|
|
|
|
val s: List[String] = List("https://doi.org/" + doi)
|
|
|
|
|
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
|
|
|
|
// if (links.nonEmpty) {
|
|
|
|
|
// instance.setUrl(links.asJava)
|
|
|
|
|
// }
|
|
|
|
|
if (s.nonEmpty) {
|
|
|
|
|
instance.setUrl(s.asJava)
|
|
|
|
|
}
|
|
|
|
|
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
|
|
|
|
//Mapping book
|
|
|
|
|
if (className.toLowerCase.contains("book")) {
|
|
|
|
|
val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn
|
|
|
|
|
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
|
|
|
|
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
|
|
|
|
if (result.getSource != null) {
|
|
|
|
|
val l: List[String] = result.getSource.asScala.toList ::: List(source)
|
|
|
|
|
result.setSource(l.asJava)
|
|
|
|
|
} else
|
|
|
|
|
result.setSource(List(source).asJava)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// Mapping Journal
|
|
|
|
|
val issnInfos = for {
|
|
|
|
|
JObject(issn_type) <- json \ "issn-type"
|
|
|
|
|
JField("type", JString(tp)) <- issn_type
|
|
|
|
|
JField("value", JString(vl)) <- issn_type
|
|
|
|
|
} yield Tuple2(tp, vl)
|
|
|
|
|
|
|
|
|
|
val volume = (json \ "volume").extractOrElse[String](null)
|
|
|
|
|
if (containerTitles.nonEmpty) {
|
|
|
|
|
val journal = new Journal
|
|
|
|
|
journal.setName(containerTitles.head)
|
|
|
|
|
if (issnInfos.nonEmpty) {
|
|
|
|
|
|
|
|
|
|
issnInfos.foreach(tp => {
|
|
|
|
|
tp._1 match {
|
|
|
|
|
case "electronic" => journal.setIssnOnline(tp._2)
|
|
|
|
|
case "print" => journal.setIssnPrinted(tp._2)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
journal.setVol(volume)
|
|
|
|
|
val page = (json \ "page").extractOrElse[String](null)
|
|
|
|
|
if (page != null) {
|
|
|
|
|
val pp = page.split("-")
|
|
|
|
|
if (pp.nonEmpty)
|
|
|
|
|
journal.setSp(pp.head)
|
|
|
|
|
if (pp.size > 1)
|
|
|
|
|
journal.setEp(pp(1))
|
|
|
|
|
}
|
|
|
|
|
result.setJournal(journal)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
result.setInstance(List(instance).asJava)
|
|
|
|
|
|
|
|
|
|
//IMPORTANT
|
|
|
|
|
//The old method result.setId(generateIdentifier(result, doi))
|
|
|
|
|
//is replaced using IdentifierFactory, but the old identifier
|
|
|
|
|
//is preserved among the originalId(s)
|
|
|
|
|
val oldId = generateIdentifier(result, doi)
|
|
|
|
|
result.setId(oldId)
|
|
|
|
|
|
|
|
|
|
val newId = IdentifierFactory.createDOIBoostIdentifier(result)
|
|
|
|
|
if (!oldId.equalsIgnoreCase(newId)) {
|
|
|
|
|
result.getOriginalId.add(oldId)
|
|
|
|
|
}
|
|
|
|
|
result.setId(newId)
|
|
|
|
|
|
|
|
|
|
if (result.getId == null)
|
|
|
|
|
result.setId("ID")
|
|
|
|
|
result.setId(IdentifierFactory.createIdentifier(result, true))
|
|
|
|
|
if (result.getId == null || "ID".equalsIgnoreCase(result.getId))
|
|
|
|
|
null
|
|
|
|
|
else
|
|
|
|
|
result
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def decideAccessRight(license: String, date: String): AccessRight = {
|
|
|
|
|
if (license == null || license.isEmpty) {
|
|
|
|
|
//Default value Unknown
|
|
|
|
|
return ModelConstants.UNKNOWN_ACCESS_RIGHT();
|
|
|
|
|
}
|
|
|
|
|
//CC licenses
|
|
|
|
|
if (
|
|
|
|
|
license.startsWith("cc") ||
|
|
|
|
|
license.startsWith("http://creativecommons.org/licenses") ||
|
|
|
|
|
license.startsWith("https://creativecommons.org/licenses") ||
|
|
|
|
|
|
|
|
|
|
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
|
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
|
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
|
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
|
|
|
|
|
|
|
|
|
|
//APA (considered OPEN also by Unpaywall)
|
|
|
|
|
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
|
|
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
|
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
|
return oaq
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED)
|
|
|
|
|
if (
|
|
|
|
|
license.equals(
|
|
|
|
|
"https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"
|
|
|
|
|
)
|
|
|
|
|
) {
|
|
|
|
|
val now = java.time.LocalDate.now
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
|
|
|
|
|
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
|
|
|
|
|
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
|
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
|
return oaq
|
|
|
|
|
} else {
|
|
|
|
|
return ModelConstants.EMBARGOED_ACCESS_RIGHT()
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
case _: Exception => {
|
|
|
|
|
try {
|
|
|
|
|
val pub_date =
|
|
|
|
|
LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))
|
|
|
|
|
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
|
|
|
|
|
val oaq: AccessRight = OPEN_ACCESS_RIGHT()
|
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
|
return oaq
|
|
|
|
|
} else {
|
|
|
|
|
return ModelConstants.EMBARGOED_ACCESS_RIGHT()
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
case _: Exception => return ModelConstants.CLOSED_ACCESS_RIGHT()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ModelConstants.CLOSED_ACCESS_RIGHT()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
|
|
|
|
if (StringUtils.isNotBlank(dt))
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dt)
|
|
|
|
|
if (datePart != null && datePart.size == 1) {
|
|
|
|
|
val res = datePart.head
|
|
|
|
|
if (res.size == 3) {
|
|
|
|
|
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
|
|
|
|
|
if (dp.length == 10) {
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dp)
|
|
|
|
|
}
|
|
|
|
|
} else if (res.size == 2) {
|
|
|
|
|
val dp = f"${res.head}-${res(1)}%02d-01"
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dp)
|
|
|
|
|
} else if (res.size == 1) {
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def generateDate(
|
|
|
|
|
dt: String,
|
|
|
|
|
datePart: List[List[Int]],
|
|
|
|
|
classId: String,
|
|
|
|
|
schemeId: String
|
|
|
|
|
): StructuredProperty = {
|
|
|
|
|
val dp = extractDate(dt, datePart)
|
|
|
|
|
if (StringUtils.isNotBlank(dp))
|
|
|
|
|
structuredProperty(dp, classId, classId, schemeId)
|
|
|
|
|
else
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String, String) = {
|
|
|
|
|
val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
|
|
|
|
|
if (term != null) {
|
|
|
|
|
val resourceType =
|
|
|
|
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
|
|
|
|
|
|
|
|
|
|
resourceType match {
|
|
|
|
|
case "publication" => (new Publication, resourceType, term.getClassname)
|
|
|
|
|
case "dataset" => (new Dataset, resourceType, term.getClassname)
|
|
|
|
|
case "software" => (new Software, resourceType, term.getClassname)
|
|
|
|
|
case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname)
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def generateAuthor(ca: CrossrefAuthor): Author = {
|
|
|
|
|
val a = new Author
|
|
|
|
|
a.setName(ca.givenName)
|
|
|
|
|
a.setSurname(ca.familyName)
|
|
|
|
|
a.setFullname(s"${ca.familyName}, ${ca.givenName}")
|
|
|
|
|
a.setRank(ca.rank + 1)
|
|
|
|
|
if (StringUtils.isNotBlank(ca.ORCID))
|
|
|
|
|
a.setPid(
|
|
|
|
|
List(
|
|
|
|
|
OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null)
|
|
|
|
|
).asJava
|
|
|
|
|
)
|
|
|
|
|
a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|