2020-04-20 09:53:34 +02:00
|
|
|
|
package eu.dnetlib.doiboost.crossref
|
|
|
|
|
|
2021-03-31 18:33:57 +02:00
|
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
2020-04-20 09:53:34 +02:00
|
|
|
|
import eu.dnetlib.dhp.schema.oaf._
|
2021-07-01 22:13:45 +02:00
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
2020-04-20 13:26:29 +02:00
|
|
|
|
import eu.dnetlib.dhp.utils.DHPUtils
|
2021-07-14 11:43:00 +02:00
|
|
|
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _}
|
2020-04-20 18:10:07 +02:00
|
|
|
|
import org.apache.commons.lang.StringUtils
|
2020-04-20 09:53:34 +02:00
|
|
|
|
import org.json4s
|
|
|
|
|
import org.json4s.DefaultFormats
|
2021-06-14 09:45:14 +02:00
|
|
|
|
import org.json4s.JsonAST.{JValue, _}
|
2020-04-20 09:53:34 +02:00
|
|
|
|
import org.json4s.jackson.JsonMethods._
|
2020-04-29 13:13:02 +02:00
|
|
|
|
import org.slf4j.{Logger, LoggerFactory}
|
|
|
|
|
|
2020-04-20 09:53:34 +02:00
|
|
|
|
import scala.collection.JavaConverters._
|
2020-04-29 13:13:02 +02:00
|
|
|
|
import scala.collection.mutable
|
|
|
|
|
import scala.util.matching.Regex
|
2021-05-19 16:01:52 +02:00
|
|
|
|
import java.util
|
|
|
|
|
|
2021-06-30 10:03:15 +02:00
|
|
|
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
|
|
|
|
|
2020-11-25 17:15:54 +01:00
|
|
|
|
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
2020-10-08 10:10:13 +02:00
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
case class mappingAffiliation(name: String) {}
|
2020-04-20 13:26:29 +02:00
|
|
|
|
|
2021-03-11 11:32:32 +01:00
|
|
|
|
case class mappingAuthor(given: Option[String], family: String, sequence:Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation]) {}
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
|
|
|
|
|
2020-04-23 09:33:48 +02:00
|
|
|
|
|
|
|
|
|
case object Crossref2Oaf {
|
2020-04-29 13:13:02 +02:00
|
|
|
|
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
2020-04-20 09:53:34 +02:00
|
|
|
|
|
|
|
|
|
val mappingCrossrefType = Map(
|
2020-04-20 13:26:29 +02:00
|
|
|
|
"book-section" -> "publication",
|
|
|
|
|
"book" -> "publication",
|
|
|
|
|
"book-chapter" -> "publication",
|
|
|
|
|
"book-part" -> "publication",
|
|
|
|
|
"book-series" -> "publication",
|
|
|
|
|
"book-set" -> "publication",
|
|
|
|
|
"book-track" -> "publication",
|
|
|
|
|
"edited-book" -> "publication",
|
|
|
|
|
"reference-book" -> "publication",
|
|
|
|
|
"monograph" -> "publication",
|
|
|
|
|
"journal-article" -> "publication",
|
|
|
|
|
"dissertation" -> "publication",
|
|
|
|
|
"other" -> "publication",
|
|
|
|
|
"peer-review" -> "publication",
|
|
|
|
|
"proceedings" -> "publication",
|
|
|
|
|
"proceedings-article" -> "publication",
|
|
|
|
|
"reference-entry" -> "publication",
|
|
|
|
|
"report" -> "publication",
|
|
|
|
|
"report-series" -> "publication",
|
|
|
|
|
"standard" -> "publication",
|
|
|
|
|
"standard-series" -> "publication",
|
|
|
|
|
"posted-content" -> "publication",
|
|
|
|
|
"dataset" -> "dataset"
|
2020-04-20 09:53:34 +02:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
val mappingCrossrefSubType = Map(
|
2020-04-20 13:26:29 +02:00
|
|
|
|
"book-section" -> "0013 Part of book or chapter of book",
|
|
|
|
|
"book" -> "0002 Book",
|
|
|
|
|
"book-chapter" -> "0013 Part of book or chapter of book",
|
|
|
|
|
"book-part" -> "0013 Part of book or chapter of book",
|
|
|
|
|
"book-series" -> "0002 Book",
|
|
|
|
|
"book-set" -> "0002 Book",
|
|
|
|
|
"book-track" -> "0002 Book",
|
|
|
|
|
"edited-book" -> "0002 Book",
|
|
|
|
|
"reference-book" -> "0002 Book",
|
|
|
|
|
"monograph" -> "0002 Book",
|
|
|
|
|
"journal-article" -> "0001 Article",
|
|
|
|
|
"dissertation" -> "0006 Doctoral thesis",
|
|
|
|
|
"other" -> "0038 Other literature type",
|
|
|
|
|
"peer-review" -> "0015 Review",
|
|
|
|
|
"proceedings" -> "0004 Conference object",
|
|
|
|
|
"proceedings-article" -> "0004 Conference object",
|
|
|
|
|
"reference-entry" -> "0013 Part of book or chapter of book",
|
|
|
|
|
"report" -> "0017 Report",
|
|
|
|
|
"report-series" -> "0017 Report",
|
|
|
|
|
"standard" -> "0038 Other literature type",
|
|
|
|
|
"standard-series" -> "0038 Other literature type",
|
|
|
|
|
"dataset" -> "0021 Dataset",
|
|
|
|
|
"preprint" -> "0016 Preprint",
|
|
|
|
|
"report" -> "0017 Report"
|
2020-04-20 09:53:34 +02:00
|
|
|
|
)
|
|
|
|
|
|
2020-04-23 09:33:48 +02:00
|
|
|
|
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
|
2020-04-20 09:53:34 +02:00
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
2020-04-20 13:26:29 +02:00
|
|
|
|
|
|
|
|
|
//MAPPING Crossref DOI into PID
|
2021-06-30 10:03:15 +02:00
|
|
|
|
val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
2021-03-31 18:33:57 +02:00
|
|
|
|
result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
2020-04-20 13:26:29 +02:00
|
|
|
|
//MAPPING Crossref DOI into OriginalId
|
2020-04-22 15:00:44 +02:00
|
|
|
|
//and Other Original Identifier of dataset like clinical-trial-number
|
|
|
|
|
val clinicalTrialNumbers = for (JString(ctr) <- json \ "clinical-trial-number") yield ctr
|
|
|
|
|
val alternativeIds = for (JString(ids) <- json \ "alternative-id") yield ids
|
|
|
|
|
val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi)
|
|
|
|
|
|
2021-05-19 16:01:52 +02:00
|
|
|
|
val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava)
|
|
|
|
|
result.setOriginalId(originalIds)
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
2021-06-30 10:03:15 +02:00
|
|
|
|
|
2020-04-20 13:26:29 +02:00
|
|
|
|
// Add DataInfo
|
|
|
|
|
result.setDataInfo(generateDataInfo())
|
|
|
|
|
|
2020-04-20 18:10:07 +02:00
|
|
|
|
result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long])
|
|
|
|
|
result.setDateofcollection((json \ "indexed" \ "date-time").extract[String])
|
2020-04-20 09:53:34 +02:00
|
|
|
|
|
2020-05-19 09:24:45 +02:00
|
|
|
|
result.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava)
|
2020-04-20 14:47:06 +02:00
|
|
|
|
|
2020-04-20 18:10:07 +02:00
|
|
|
|
// Publisher ( Name of work's publisher mapped into Result/Publisher)
|
2020-04-23 09:33:48 +02:00
|
|
|
|
val publisher = (json \ "publisher").extractOrElse[String](null)
|
2020-06-04 14:39:20 +02:00
|
|
|
|
if (publisher!= null && publisher.nonEmpty)
|
|
|
|
|
result.setPublisher(asField(publisher))
|
|
|
|
|
|
2020-04-20 09:53:34 +02:00
|
|
|
|
|
2020-04-20 18:10:07 +02:00
|
|
|
|
// TITLE
|
2021-05-14 11:29:49 +02:00
|
|
|
|
val mainTitles = for {JString(title) <- json \ "title" if title.nonEmpty} yield createSP(title, "main title", ModelConstants.DNET_DATACITE_TITLE)
|
|
|
|
|
val originalTitles = for {JString(title) <- json \ "original-title" if title.nonEmpty} yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE)
|
|
|
|
|
val shortTitles = for {JString(title) <- json \ "short-title" if title.nonEmpty} yield createSP(title, "alternative title", ModelConstants.DNET_DATACITE_TITLE)
|
|
|
|
|
val subtitles = for {JString(title) <- json \ "subtitle" if title.nonEmpty} yield createSP(title, "subtitle", ModelConstants.DNET_DATACITE_TITLE)
|
2020-04-22 15:00:44 +02:00
|
|
|
|
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
|
2020-04-20 09:53:34 +02:00
|
|
|
|
|
2020-04-20 18:10:07 +02:00
|
|
|
|
// DESCRIPTION
|
|
|
|
|
val descriptionList = for {JString(description) <- json \ "abstract"} yield asField(description)
|
|
|
|
|
result.setDescription(descriptionList.asJava)
|
2020-05-28 09:57:46 +02:00
|
|
|
|
|
2020-04-20 18:10:07 +02:00
|
|
|
|
// Source
|
2020-06-04 14:39:20 +02:00
|
|
|
|
val sourceList = for {JString(source) <- json \ "source" if source!= null && source.nonEmpty} yield asField(source)
|
2020-04-20 18:10:07 +02:00
|
|
|
|
result.setSource(sourceList.asJava)
|
2020-04-20 13:26:29 +02:00
|
|
|
|
|
2020-04-20 18:10:07 +02:00
|
|
|
|
//RELEVANT DATE Mapping
|
2021-05-14 11:29:49 +02:00
|
|
|
|
val createdDate = generateDate((json \ "created" \ "date-time").extract[String], (json \ "created" \ "date-parts").extract[List[List[Int]]], "created", ModelConstants.DNET_DATACITE_DATE)
|
|
|
|
|
val postedDate = generateDate((json \ "posted" \ "date-time").extractOrElse[String](null), (json \ "posted" \ "date-parts").extract[List[List[Int]]], "available", ModelConstants.DNET_DATACITE_DATE)
|
|
|
|
|
val acceptedDate = generateDate((json \ "accepted" \ "date-time").extractOrElse[String](null), (json \ "accepted" \ "date-parts").extract[List[List[Int]]], "accepted", ModelConstants.DNET_DATACITE_DATE)
|
|
|
|
|
val publishedPrintDate = generateDate((json \ "published-print" \ "date-time").extractOrElse[String](null), (json \ "published-print" \ "date-parts").extract[List[List[Int]]], "published-print", ModelConstants.DNET_DATACITE_DATE)
|
|
|
|
|
val publishedOnlineDate = generateDate((json \ "published-online" \ "date-time").extractOrElse[String](null), (json \ "published-online" \ "date-parts").extract[List[List[Int]]], "published-online", ModelConstants.DNET_DATACITE_DATE)
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
|
|
|
|
val issuedDate = extractDate((json \ "issued" \ "date-time").extractOrElse[String](null), (json \ "issued" \ "date-parts").extract[List[List[Int]]])
|
|
|
|
|
if (StringUtils.isNotBlank(issuedDate)) {
|
|
|
|
|
result.setDateofacceptance(asField(issuedDate))
|
|
|
|
|
}
|
2020-05-20 17:05:46 +02:00
|
|
|
|
else {
|
|
|
|
|
result.setDateofacceptance(asField(createdDate.getValue))
|
|
|
|
|
}
|
2020-04-22 15:00:44 +02:00
|
|
|
|
result.setRelevantdate(List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate).filter(p => p != null).asJava)
|
|
|
|
|
|
2020-05-20 08:14:03 +02:00
|
|
|
|
//Mapping Subject
|
|
|
|
|
val subjectList:List[String] = (json \ "subject").extractOrElse[List[String]](List())
|
|
|
|
|
|
|
|
|
|
if (subjectList.nonEmpty) {
|
2021-05-14 11:29:49 +02:00
|
|
|
|
result.setSubject(subjectList.map(s=> createSP(s, "keywords", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava)
|
2020-05-20 08:14:03 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2020-05-28 09:57:46 +02:00
|
|
|
|
//Mapping Author
|
2020-04-23 09:33:48 +02:00
|
|
|
|
val authorList: List[mappingAuthor] = (json \ "author").extractOrElse[List[mappingAuthor]](List())
|
2021-03-11 11:32:32 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
val sorted_list = authorList.sortWith((a:mappingAuthor, b:mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first"))
|
|
|
|
|
|
|
|
|
|
result.setAuthor(sorted_list.zipWithIndex.map{case (a, index) => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)}.asJava)
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
|
|
|
|
// Mapping instance
|
|
|
|
|
val instance = new Instance()
|
|
|
|
|
val license = for {
|
2021-07-14 11:36:30 +02:00
|
|
|
|
JObject(license) <- json \ "license"
|
|
|
|
|
JField("URL", JString(lic)) <- license
|
|
|
|
|
JField("content-version", JString(content_version)) <- license
|
|
|
|
|
} yield (asField(lic), content_version)
|
|
|
|
|
val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue))
|
|
|
|
|
if (l.nonEmpty){
|
|
|
|
|
if (l exists (d => d._2.equals("vor"))){
|
|
|
|
|
for(d <- l){
|
|
|
|
|
if (d._2.equals("vor")){
|
|
|
|
|
instance.setLicense(d._1)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
instance.setLicense(l.head._1)}
|
|
|
|
|
}
|
2020-05-22 15:15:09 +02:00
|
|
|
|
|
2021-01-12 14:42:30 +01:00
|
|
|
|
// Ticket #6281 added pid to Instance
|
2021-03-17 12:12:56 +01:00
|
|
|
|
instance.setPid(result.getPid)
|
2021-01-12 14:42:30 +01:00
|
|
|
|
|
2021-03-22 11:35:02 +01:00
|
|
|
|
val has_review = json \ "relation" \"has-review" \ "id"
|
2020-05-22 15:15:09 +02:00
|
|
|
|
|
2020-06-09 19:52:53 +02:00
|
|
|
|
if(has_review != JNothing) {
|
|
|
|
|
instance.setRefereed(
|
2021-07-01 22:13:45 +02:00
|
|
|
|
OafMapperUtils.qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS))
|
2020-06-09 19:52:53 +02:00
|
|
|
|
}
|
2020-05-22 15:15:09 +02:00
|
|
|
|
|
2021-07-14 11:43:00 +02:00
|
|
|
|
instance.setAccessright(decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue))
|
2021-07-01 22:13:45 +02:00
|
|
|
|
instance.setInstancetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
|
|
|
|
result.setResourcetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
2020-05-19 09:24:45 +02:00
|
|
|
|
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
2020-04-22 15:00:44 +02:00
|
|
|
|
if (StringUtils.isNotBlank(issuedDate)) {
|
|
|
|
|
instance.setDateofacceptance(asField(issuedDate))
|
|
|
|
|
}
|
2020-06-08 19:06:03 +02:00
|
|
|
|
else {
|
|
|
|
|
instance.setDateofacceptance(asField(createdDate.getValue))
|
|
|
|
|
}
|
2020-04-23 09:33:48 +02:00
|
|
|
|
val s: String = (json \ "URL").extract[String]
|
|
|
|
|
val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null).distinct
|
2021-05-19 15:43:26 +02:00
|
|
|
|
if (links.nonEmpty) {
|
2020-04-22 15:00:44 +02:00
|
|
|
|
instance.setUrl(links.asJava)
|
2021-05-19 15:43:26 +02:00
|
|
|
|
}
|
|
|
|
|
result.setInstance(List(instance).asJava)
|
|
|
|
|
|
|
|
|
|
//IMPORTANT
|
|
|
|
|
//The old method result.setId(generateIdentifier(result, doi))
|
|
|
|
|
//is replaced using IdentifierFactory, but the old identifier
|
|
|
|
|
//is preserved among the originalId(s)
|
|
|
|
|
val oldId = generateIdentifier(result, doi)
|
|
|
|
|
result.setId(oldId)
|
|
|
|
|
|
|
|
|
|
val newId = IdentifierFactory.createDOIBoostIdentifier(result)
|
|
|
|
|
if (!oldId.equalsIgnoreCase(newId)) {
|
|
|
|
|
result.getOriginalId.add(oldId)
|
|
|
|
|
}
|
|
|
|
|
result.setId(newId)
|
|
|
|
|
|
2021-05-19 16:01:52 +02:00
|
|
|
|
if (result.getId == null)
|
2021-03-22 11:35:02 +01:00
|
|
|
|
null
|
|
|
|
|
else
|
|
|
|
|
result
|
2020-04-20 09:53:34 +02:00
|
|
|
|
}
|
|
|
|
|
|
2020-04-20 13:26:29 +02:00
|
|
|
|
|
2021-03-11 11:32:32 +01:00
|
|
|
|
def generateAuhtor(given: String, family: String, orcid: String, index:Int): Author = {
|
2020-04-23 09:33:48 +02:00
|
|
|
|
val a = new Author
|
2020-04-22 15:00:44 +02:00
|
|
|
|
a.setName(given)
|
|
|
|
|
a.setSurname(family)
|
2020-05-22 15:15:09 +02:00
|
|
|
|
a.setFullname(s"$given $family")
|
2021-03-11 11:32:32 +01:00
|
|
|
|
a.setRank(index+1)
|
2020-04-22 15:00:44 +02:00
|
|
|
|
if (StringUtils.isNotBlank(orcid))
|
2021-03-31 18:33:57 +02:00
|
|
|
|
a.setPid(List(createSP(orcid, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES, generateDataInfo())).asJava)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
|
2020-04-22 15:00:44 +02:00
|
|
|
|
a
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
def convert(input: String): List[Oaf] = {
|
2020-04-22 15:00:44 +02:00
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
|
|
|
lazy val json: json4s.JValue = parse(input)
|
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
var resultList: List[Oaf] = List()
|
|
|
|
|
|
|
|
|
|
|
2020-04-22 15:00:44 +02:00
|
|
|
|
val objectType = (json \ "type").extractOrElse[String](null)
|
|
|
|
|
val objectSubType = (json \ "subtype").extractOrElse[String](null)
|
|
|
|
|
if (objectType == null)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
return resultList
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
val result = generateItemFromType(objectType, objectSubType)
|
|
|
|
|
if (result == null)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
return List()
|
2021-03-22 11:35:02 +01:00
|
|
|
|
val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type"))
|
2020-04-22 15:00:44 +02:00
|
|
|
|
mappingResult(result, json, cOBJCategory)
|
2021-03-22 11:35:02 +01:00
|
|
|
|
if (result == null || result.getId == null)
|
2021-03-17 15:53:24 +01:00
|
|
|
|
return List()
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
val funderList: List[mappingFunder] = (json \ "funder").extractOrElse[List[mappingFunder]](List())
|
|
|
|
|
|
|
|
|
|
if (funderList.nonEmpty) {
|
2020-05-19 09:24:45 +02:00
|
|
|
|
resultList = resultList ::: mappingFunderToRelations(funderList, result.getId, createCrossrefCollectedFrom(), result.getDataInfo, result.getLastupdatetimestamp)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-04-22 15:00:44 +02:00
|
|
|
|
result match {
|
2020-04-23 09:33:48 +02:00
|
|
|
|
case publication: Publication => convertPublication(publication, json, cOBJCategory)
|
2020-04-22 15:00:44 +02:00
|
|
|
|
case dataset: Dataset => convertDataset(dataset)
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
resultList = resultList ::: List(result)
|
|
|
|
|
resultList
|
|
|
|
|
}
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
|
|
|
|
|
def mappingFunderToRelations(funders: List[mappingFunder], sourceId: String, cf: KeyValue, di: DataInfo, ts: Long): List[Relation] = {
|
|
|
|
|
|
|
|
|
|
val queue = new mutable.Queue[Relation]
|
|
|
|
|
|
|
|
|
|
|
2020-06-09 17:05:31 +02:00
|
|
|
|
def snsfRule(award:String): String = {
|
2020-12-07 10:42:38 +01:00
|
|
|
|
val tmp1 = StringUtils.substringAfter(award,"_")
|
2020-05-20 17:05:46 +02:00
|
|
|
|
val tmp2 = StringUtils.substringBefore(tmp1,"/")
|
|
|
|
|
logger.debug(s"From $award to $tmp2")
|
|
|
|
|
tmp2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
def extractECAward(award: String): String = {
|
|
|
|
|
val awardECRegex: Regex = "[0-9]{4,9}".r
|
|
|
|
|
if (awardECRegex.findAllIn(award).hasNext)
|
|
|
|
|
return awardECRegex.findAllIn(award).max
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-11-24 15:17:23 +01:00
|
|
|
|
def generateRelation(sourceId:String, targetId:String, relClass:String) :Relation = {
|
2020-04-29 13:13:02 +02:00
|
|
|
|
|
|
|
|
|
val r = new Relation
|
|
|
|
|
r.setSource(sourceId)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
r.setTarget(targetId)
|
2021-05-14 11:29:49 +02:00
|
|
|
|
r.setRelType(ModelConstants.RESULT_PROJECT)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
r.setRelClass(relClass)
|
2021-05-14 11:29:49 +02:00
|
|
|
|
r.setSubRelType(ModelConstants.OUTCOME)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
r.setCollectedfrom(List(cf).asJava)
|
|
|
|
|
r.setDataInfo(di)
|
|
|
|
|
r.setLastupdatetimestamp(ts)
|
|
|
|
|
r
|
2020-11-24 15:17:23 +01:00
|
|
|
|
|
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generateSimpleRelationFromAward(funder: mappingFunder, nsPrefix: String, extractField: String => String): Unit = {
|
|
|
|
|
if (funder.award.isDefined && funder.award.get.nonEmpty)
|
|
|
|
|
funder.award.get.map(extractField).filter(a => a!= null && a.nonEmpty).foreach(
|
|
|
|
|
award => {
|
2020-11-24 15:17:23 +01:00
|
|
|
|
val targetId = getProjectId(nsPrefix, DHPUtils.md5(award))
|
2021-05-14 11:29:49 +02:00
|
|
|
|
queue += generateRelation(sourceId, targetId , ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId , sourceId, ModelConstants.PRODUCES)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-24 15:17:23 +01:00
|
|
|
|
def getProjectId (nsPrefix:String, targetId:String):String = {
|
2020-12-02 14:28:09 +01:00
|
|
|
|
s"40|$nsPrefix::$targetId"
|
2020-11-24 15:17:23 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
if (funders != null)
|
|
|
|
|
funders.foreach(funder => {
|
|
|
|
|
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
|
|
|
|
|
funder.DOI.get match {
|
|
|
|
|
case "10.13039/100010663" |
|
|
|
|
|
"10.13039/100010661" |
|
|
|
|
|
"10.13039/501100007601" |
|
|
|
|
|
"10.13039/501100000780" |
|
|
|
|
|
"10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
|
|
|
|
case "10.13039/100011199" |
|
|
|
|
|
"10.13039/100004431" |
|
|
|
|
|
"10.13039/501100004963" |
|
|
|
|
|
"10.13039/501100000780" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
|
|
|
|
case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
|
|
|
|
case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a)
|
|
|
|
|
case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
|
|
|
|
|
case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a)
|
|
|
|
|
case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", ""))
|
|
|
|
|
case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
case "10.13039/501100000038"=> val targetId = getProjectId("nserc_______" , "1e5e62235d094afd01cd56e65112fc63")
|
2021-05-14 11:29:49 +02:00
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
case "10.13039/501100000155"=> val targetId = getProjectId("sshrc_______" , "1e5e62235d094afd01cd56e65112fc63")
|
2021-05-14 11:29:49 +02:00
|
|
|
|
queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
case "10.13039/501100000024"=> val targetId = getProjectId("cihr________" , "1e5e62235d094afd01cd56e65112fc63")
|
2021-05-14 11:29:49 +02:00
|
|
|
|
queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
|
|
|
|
|
case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
|
|
|
|
|
case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a=>a)
|
|
|
|
|
case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
|
|
|
|
|
case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a=>a)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
val targetId = getProjectId("miur________" , "1e5e62235d094afd01cd56e65112fc63")
|
2021-05-14 11:29:49 +02:00
|
|
|
|
queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
case "10.13039/501100006588" |
|
|
|
|
|
"10.13039/501100004488" => generateSimpleRelationFromAward(funder, "irb_hr______", a=>a.replaceAll("Project No.", "").replaceAll("HRZZ-","") )
|
|
|
|
|
case "10.13039/501100006769"=> generateSimpleRelationFromAward(funder, "rsf_________", a=>a)
|
2020-06-09 17:05:31 +02:00
|
|
|
|
case "10.13039/501100001711"=> generateSimpleRelationFromAward(funder, "snsf________", snsfRule)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
case "10.13039/501100004410"=> generateSimpleRelationFromAward(funder, "tubitakf____", a =>a)
|
|
|
|
|
case "10.10.13039/100004440"=> generateSimpleRelationFromAward(funder, "wt__________", a =>a)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
case "10.13039/100004440"=> val targetId = getProjectId("wt__________" , "1e5e62235d094afd01cd56e65112fc63")
|
2021-05-14 11:29:49 +02:00
|
|
|
|
queue += generateRelation(sourceId,targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId,sourceId, ModelConstants.PRODUCES)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
case _ => logger.debug("no match for "+funder.DOI.get )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
funder.name match {
|
2021-05-14 11:29:49 +02:00
|
|
|
|
case "European Union’s Horizon 2020 research and innovation program" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
case "European Union's" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
|
|
|
|
case "The French National Research Agency (ANR)" |
|
|
|
|
|
"The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
|
|
|
|
|
case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)
|
2020-11-24 15:17:23 +01:00
|
|
|
|
case "Wellcome Trust Masters Fellowship" => val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
|
2021-05-14 11:29:49 +02:00
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY )
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES )
|
2020-04-29 13:13:02 +02:00
|
|
|
|
case _ => logger.debug("no match for "+funder.name )
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
queue.toList
|
2020-04-22 15:00:44 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def convertDataset(dataset: Dataset): Unit = {
|
2020-06-01 17:52:12 +02:00
|
|
|
|
// TODO check if there are other info to map into the Dataset
|
2020-04-22 15:00:44 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-04-23 09:33:48 +02:00
|
|
|
|
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = {
|
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
2020-04-22 15:00:44 +02:00
|
|
|
|
val containerTitles = for {JString(ct) <- json \ "container-title"} yield ct
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//Mapping book
|
|
|
|
|
if (cobjCategory.toLowerCase.contains("book")) {
|
|
|
|
|
val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn
|
|
|
|
|
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
|
|
|
|
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
|
|
|
|
if (publication.getSource != null) {
|
|
|
|
|
val l: List[Field[String]] = publication.getSource.asScala.toList
|
|
|
|
|
val ll: List[Field[String]] = l ::: List(asField(source))
|
|
|
|
|
publication.setSource(ll.asJava)
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
publication.setSource(List(asField(source)).asJava)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2020-04-23 09:33:48 +02:00
|
|
|
|
// Mapping Journal
|
|
|
|
|
|
|
|
|
|
val issnInfos = for {JArray(issn_types) <- json \ "issn-type"
|
|
|
|
|
JObject(issn_type) <- issn_types
|
|
|
|
|
JField("type", JString(tp)) <- issn_type
|
|
|
|
|
JField("value", JString(vl)) <- issn_type
|
|
|
|
|
} yield Tuple2(tp, vl)
|
|
|
|
|
|
2020-04-29 13:13:02 +02:00
|
|
|
|
val volume = (json \ "volume").extractOrElse[String](null)
|
2020-04-23 09:33:48 +02:00
|
|
|
|
if (containerTitles.nonEmpty) {
|
|
|
|
|
val journal = new Journal
|
|
|
|
|
journal.setName(containerTitles.head)
|
|
|
|
|
if (issnInfos.nonEmpty) {
|
|
|
|
|
|
|
|
|
|
issnInfos.foreach(tp => {
|
|
|
|
|
tp._1 match {
|
|
|
|
|
case "electronic" => journal.setIssnOnline(tp._2)
|
|
|
|
|
case "print" => journal.setIssnPrinted(tp._2)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
journal.setVol(volume)
|
2020-04-29 13:13:02 +02:00
|
|
|
|
val page = (json \ "page").extractOrElse[String](null)
|
|
|
|
|
if (page != null) {
|
2020-04-23 09:33:48 +02:00
|
|
|
|
val pp = page.split("-")
|
2020-05-11 09:38:27 +02:00
|
|
|
|
if (pp.nonEmpty)
|
|
|
|
|
journal.setSp(pp.head)
|
2020-04-23 09:33:48 +02:00
|
|
|
|
if (pp.size > 1)
|
|
|
|
|
journal.setEp(pp(1))
|
|
|
|
|
}
|
|
|
|
|
publication.setJournal(journal)
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-04-22 15:00:44 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
|
|
|
|
if (StringUtils.isNotBlank(dt))
|
|
|
|
|
return dt
|
2020-04-20 18:10:07 +02:00
|
|
|
|
if (datePart != null && datePart.size == 1) {
|
|
|
|
|
val res = datePart.head
|
|
|
|
|
if (res.size == 3) {
|
|
|
|
|
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
|
|
|
|
|
if (dp.length == 10) {
|
2020-04-22 15:00:44 +02:00
|
|
|
|
return dp
|
2020-04-20 18:10:07 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-04-20 09:53:34 +02:00
|
|
|
|
}
|
|
|
|
|
null
|
2020-04-22 15:00:44 +02:00
|
|
|
|
|
2020-04-20 09:53:34 +02:00
|
|
|
|
}
|
|
|
|
|
|
2020-04-22 15:00:44 +02:00
|
|
|
|
def generateDate(dt: String, datePart: List[List[Int]], classId: String, schemeId: String): StructuredProperty = {
|
|
|
|
|
val dp = extractDate(dt, datePart)
|
|
|
|
|
if (StringUtils.isNotBlank(dp))
|
|
|
|
|
return createSP(dp, classId, schemeId)
|
|
|
|
|
null
|
|
|
|
|
}
|
2020-04-20 18:10:07 +02:00
|
|
|
|
|
2020-04-22 15:00:44 +02:00
|
|
|
|
def generateItemFromType(objectType: String, objectSubType: String): Result = {
|
|
|
|
|
if (mappingCrossrefType.contains(objectType)) {
|
|
|
|
|
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication"))
|
|
|
|
|
return new Publication()
|
|
|
|
|
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset"))
|
|
|
|
|
return new Dataset()
|
|
|
|
|
}
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|