2024-03-13 06:56:10 +01:00
|
|
|
|
package eu.dnetlib.dhp.collection.crossref
|
|
|
|
|
|
2024-03-20 17:04:52 +01:00
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper
|
2024-04-03 17:07:14 +02:00
|
|
|
|
import eu.dnetlib.dhp.actionmanager.ror.GenerateRorActionSetJob
|
2024-02-14 11:41:28 +01:00
|
|
|
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
2024-03-13 06:56:10 +01:00
|
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf._
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.{field, qualifier, structuredProperty, subject}
|
2024-03-20 17:04:52 +01:00
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.{
|
|
|
|
|
DoiCleaningRule,
|
|
|
|
|
GraphCleaningFunctions,
|
|
|
|
|
IdentifierFactory,
|
|
|
|
|
OafMapperUtils,
|
|
|
|
|
PidType
|
|
|
|
|
}
|
2024-03-13 06:56:10 +01:00
|
|
|
|
import eu.dnetlib.dhp.utils.DHPUtils
|
|
|
|
|
import org.apache.commons.lang.StringUtils
|
2024-04-03 17:07:14 +02:00
|
|
|
|
import org.apache.spark.sql.Row
|
2024-03-13 06:56:10 +01:00
|
|
|
|
import org.json4s
|
|
|
|
|
import org.json4s.DefaultFormats
|
|
|
|
|
import org.json4s.JsonAST._
|
|
|
|
|
import org.json4s.jackson.JsonMethods._
|
|
|
|
|
import org.slf4j.{Logger, LoggerFactory}
|
|
|
|
|
|
|
|
|
|
import java.time.LocalDate
|
|
|
|
|
import java.time.format.DateTimeFormatter
|
|
|
|
|
import java.util
|
|
|
|
|
import scala.collection.JavaConverters._
|
|
|
|
|
import scala.collection.mutable
|
|
|
|
|
import scala.io.Source
|
|
|
|
|
import scala.util.matching.Regex
|
|
|
|
|
|
|
|
|
|
case class mappingAffiliation(name: String) {}
|
|
|
|
|
|
|
|
|
|
case class mappingAuthor(
|
|
|
|
|
given: Option[String],
|
|
|
|
|
family: Option[String],
|
|
|
|
|
sequence: Option[String],
|
|
|
|
|
ORCID: Option[String],
|
|
|
|
|
affiliation: Option[mappingAffiliation]
|
|
|
|
|
) {}
|
|
|
|
|
|
|
|
|
|
case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {}
|
|
|
|
|
|
|
|
|
|
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
|
|
|
|
|
2024-03-25 18:18:10 +01:00
|
|
|
|
case class UnpayWall(doi: String, is_oa: Boolean, best_oa_location: UnpayWallOALocation, oa_status: String) {}
|
|
|
|
|
|
|
|
|
|
case class UnpayWallOALocation(license: Option[String], url: String, host_type: Option[String]) {}
|
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
case object Crossref2Oaf {
|
|
|
|
|
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
2024-03-20 17:04:52 +01:00
|
|
|
|
val mapper = new ObjectMapper
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
|
|
|
|
val irishFunder: List[funderInfo] = {
|
|
|
|
|
val s = Source
|
2024-03-20 17:04:52 +01:00
|
|
|
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/irish_funder.json"))
|
2024-03-13 06:56:10 +01:00
|
|
|
|
.mkString
|
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
|
|
|
lazy val json: org.json4s.JValue = parse(s)
|
|
|
|
|
json.extract[List[funderInfo]]
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-20 17:04:52 +01:00
|
|
|
|
val invalidName = List(
|
|
|
|
|
",",
|
|
|
|
|
"none none",
|
|
|
|
|
"none, none",
|
|
|
|
|
"none &na;",
|
|
|
|
|
"(:null)",
|
|
|
|
|
"test test test",
|
|
|
|
|
"test test",
|
|
|
|
|
"test",
|
|
|
|
|
"&na; &na;"
|
|
|
|
|
)
|
|
|
|
|
|
2024-02-14 11:41:28 +01:00
|
|
|
|
def getIrishId(doi: String): Option[String] = {
|
|
|
|
|
val id = doi.split("/").last
|
|
|
|
|
irishFunder
|
|
|
|
|
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
|
|
|
|
|
.map(f => f.id)
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
def createCrossrefCollectedFrom(): KeyValue = {
|
|
|
|
|
|
|
|
|
|
val cf = new KeyValue
|
|
|
|
|
cf.setValue("Crossref");
|
|
|
|
|
cf.setKey(ModelConstants.CROSSREF_ID)
|
|
|
|
|
cf
|
|
|
|
|
|
|
|
|
|
}
|
2024-03-20 17:04:52 +01:00
|
|
|
|
|
2024-03-25 18:18:10 +01:00
|
|
|
|
def createUnpayWallCollectedFrom(): KeyValue = {
|
|
|
|
|
|
|
|
|
|
val cf = new KeyValue
|
|
|
|
|
cf.setValue("UnpayWall")
|
|
|
|
|
cf.setKey(s"10|openaire____:${DHPUtils.md5("UnpayWall".toLowerCase)}")
|
|
|
|
|
cf
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
def generateDataInfo(): DataInfo = {
|
2024-03-20 17:04:52 +01:00
|
|
|
|
generateDataInfo("0.91")
|
2024-03-13 06:56:10 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def generateDataInfo(trust: String): DataInfo = {
|
|
|
|
|
val di = new DataInfo
|
|
|
|
|
di.setDeletedbyinference(false)
|
|
|
|
|
di.setInferred(false)
|
|
|
|
|
di.setInvisible(false)
|
|
|
|
|
di.setTrust(trust)
|
|
|
|
|
di.setProvenanceaction(
|
|
|
|
|
OafMapperUtils.qualifier(
|
|
|
|
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
|
|
|
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
|
|
|
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
|
|
|
|
ModelConstants.DNET_PROVENANCE_ACTIONS
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
di
|
|
|
|
|
}
|
2024-03-20 17:04:52 +01:00
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
def getOpenAccessQualifier(): AccessRight = {
|
|
|
|
|
|
|
|
|
|
OafMapperUtils.accessRight(
|
|
|
|
|
ModelConstants.ACCESS_RIGHT_OPEN,
|
|
|
|
|
"Open Access",
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES,
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def getRestrictedQualifier(): AccessRight = {
|
|
|
|
|
OafMapperUtils.accessRight(
|
|
|
|
|
"RESTRICTED",
|
|
|
|
|
"Restricted",
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES,
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES
|
|
|
|
|
)
|
|
|
|
|
}
|
2024-03-20 17:04:52 +01:00
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
def getUnknownQualifier(): AccessRight = {
|
|
|
|
|
OafMapperUtils.accessRight(
|
|
|
|
|
ModelConstants.UNKNOWN,
|
|
|
|
|
ModelConstants.NOT_AVAILABLE,
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES,
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def getEmbargoedAccessQualifier(): AccessRight = {
|
|
|
|
|
OafMapperUtils.accessRight(
|
|
|
|
|
"EMBARGO",
|
|
|
|
|
"Embargo",
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES,
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def getClosedAccessQualifier(): AccessRight = {
|
|
|
|
|
OafMapperUtils.accessRight(
|
|
|
|
|
"CLOSED",
|
|
|
|
|
"Closed Access",
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES,
|
|
|
|
|
ModelConstants.DNET_ACCESS_MODES
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def decideAccessRight(lic: Field[String], date: String): AccessRight = {
|
|
|
|
|
if (lic == null) {
|
|
|
|
|
//Default value Unknown
|
|
|
|
|
return getUnknownQualifier()
|
|
|
|
|
}
|
|
|
|
|
val license: String = lic.getValue
|
|
|
|
|
//CC licenses
|
|
|
|
|
if (
|
|
|
|
|
license.startsWith("cc") ||
|
2024-03-20 17:04:52 +01:00
|
|
|
|
license.startsWith("http://creativecommons.org/licenses") ||
|
|
|
|
|
license.startsWith("https://creativecommons.org/licenses") ||
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
2024-03-20 17:04:52 +01:00
|
|
|
|
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
|
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
|
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
|
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
2024-03-20 17:04:52 +01:00
|
|
|
|
//APA (considered OPEN also by Unpaywall)
|
|
|
|
|
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
|
2024-03-13 06:56:10 +01:00
|
|
|
|
) {
|
|
|
|
|
|
|
|
|
|
val oaq: AccessRight = getOpenAccessQualifier()
|
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
|
return oaq
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED)
|
|
|
|
|
if (
|
|
|
|
|
license.equals(
|
|
|
|
|
"https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"
|
|
|
|
|
)
|
|
|
|
|
) {
|
|
|
|
|
val now = java.time.LocalDate.now
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
|
|
|
|
|
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
|
|
|
|
|
val oaq: AccessRight = getOpenAccessQualifier()
|
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
|
return oaq
|
|
|
|
|
} else {
|
|
|
|
|
return getEmbargoedAccessQualifier()
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
case e: Exception => {
|
|
|
|
|
try {
|
|
|
|
|
val pub_date =
|
|
|
|
|
LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))
|
|
|
|
|
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
|
|
|
|
|
val oaq: AccessRight = getOpenAccessQualifier()
|
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
|
return oaq
|
|
|
|
|
} else {
|
|
|
|
|
return getEmbargoedAccessQualifier()
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
case ex: Exception => return getClosedAccessQualifier()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
getClosedAccessQualifier()
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-20 17:04:52 +01:00
|
|
|
|
def isValidAuthorName(fullName: String): Boolean = {
|
|
|
|
|
if (fullName == null || fullName.isEmpty)
|
|
|
|
|
return false
|
|
|
|
|
if (invalidName.contains(fullName.toLowerCase.trim))
|
|
|
|
|
return false
|
|
|
|
|
true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def filterResult(publication: Result): Boolean = {
|
|
|
|
|
|
|
|
|
|
//Case empty publication
|
|
|
|
|
if (publication == null)
|
|
|
|
|
return false
|
|
|
|
|
if (publication.getId == null || publication.getId.isEmpty)
|
|
|
|
|
return false
|
|
|
|
|
|
|
|
|
|
//Case publication with no title
|
|
|
|
|
if (publication.getTitle == null || publication.getTitle.size == 0)
|
|
|
|
|
return false
|
|
|
|
|
|
|
|
|
|
val s = publication.getTitle.asScala.count(p =>
|
|
|
|
|
p.getValue != null
|
|
|
|
|
&& p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if (s == 0)
|
|
|
|
|
return false
|
|
|
|
|
|
|
|
|
|
// fixes #4360 (test publisher)
|
|
|
|
|
val publisher =
|
|
|
|
|
if (publication.getPublisher != null) publication.getPublisher.getValue else null
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher
|
|
|
|
|
.equalsIgnoreCase("CrossRef Test Account"))
|
|
|
|
|
) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//RELAXED this constraint
|
|
|
|
|
//Publication with no Author
|
|
|
|
|
if (publication.getAuthor == null || publication.getAuthor.size() == 0)
|
|
|
|
|
return true
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
2024-03-20 17:04:52 +01:00
|
|
|
|
//filter invalid author
|
|
|
|
|
val authors = publication.getAuthor.asScala.map(s => {
|
|
|
|
|
if (s.getFullname.nonEmpty) {
|
|
|
|
|
s.getFullname
|
|
|
|
|
} else
|
|
|
|
|
s"${s.getName} ${s.getSurname}"
|
|
|
|
|
})
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
2024-03-20 17:04:52 +01:00
|
|
|
|
val c = authors.count(isValidAuthorName)
|
|
|
|
|
if (c == 0)
|
|
|
|
|
return false
|
|
|
|
|
|
|
|
|
|
// fixes #4368
|
|
|
|
|
if (
|
|
|
|
|
authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(
|
|
|
|
|
publication.getPublisher.getValue
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
return false
|
|
|
|
|
|
|
|
|
|
true
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-25 18:18:10 +01:00
|
|
|
|
def get_unpaywall_color(input: String): Option[OpenAccessRoute] = {
|
|
|
|
|
if (input == null || input.equalsIgnoreCase("close"))
|
|
|
|
|
return None
|
|
|
|
|
if (input.equalsIgnoreCase("green"))
|
|
|
|
|
return Some(OpenAccessRoute.green)
|
|
|
|
|
if (input.equalsIgnoreCase("bronze"))
|
|
|
|
|
return Some(OpenAccessRoute.bronze)
|
|
|
|
|
if (input.equalsIgnoreCase("hybrid"))
|
|
|
|
|
return Some(OpenAccessRoute.hybrid)
|
|
|
|
|
else
|
|
|
|
|
return Some(OpenAccessRoute.gold)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def get_color(input: String): Option[OpenAccessRoute] = {
|
|
|
|
|
if (input == null || input.equalsIgnoreCase("closed"))
|
|
|
|
|
return None
|
|
|
|
|
if (input.equalsIgnoreCase("green"))
|
|
|
|
|
return Some(OpenAccessRoute.green)
|
|
|
|
|
if (input.equalsIgnoreCase("bronze"))
|
|
|
|
|
return Some(OpenAccessRoute.bronze)
|
|
|
|
|
if (input.equalsIgnoreCase("hybrid"))
|
|
|
|
|
return Some(OpenAccessRoute.hybrid)
|
|
|
|
|
else
|
|
|
|
|
return Some(OpenAccessRoute.gold)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-20 17:04:52 +01:00
|
|
|
|
def mappingResult(result: Result, json: JValue, instanceType: Qualifier, originalType: String): Result = {
|
2024-03-13 06:56:10 +01:00
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
|
|
|
|
|
|
|
|
//MAPPING Crossref DOI into PID
|
|
|
|
|
val doi: String = DoiCleaningRule.normalizeDoi((json \ "DOI").extract[String])
|
2024-03-20 17:04:52 +01:00
|
|
|
|
result.setPid(
|
|
|
|
|
List(
|
|
|
|
|
structuredProperty(
|
|
|
|
|
doi,
|
|
|
|
|
qualifier(
|
|
|
|
|
PidType.doi.toString,
|
|
|
|
|
PidType.doi.toString,
|
|
|
|
|
ModelConstants.DNET_PID_TYPES,
|
|
|
|
|
ModelConstants.DNET_PID_TYPES
|
|
|
|
|
),
|
|
|
|
|
null
|
|
|
|
|
)
|
|
|
|
|
).asJava
|
|
|
|
|
)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
|
|
|
|
//MAPPING Crossref DOI into OriginalId
|
|
|
|
|
//and Other Original Identifier of dataset like clinical-trial-number
|
|
|
|
|
val clinicalTrialNumbers = for (JString(ctr) <- json \ "clinical-trial-number") yield ctr
|
|
|
|
|
val alternativeIds = for (JString(ids) <- json \ "alternative-id") yield ids
|
|
|
|
|
val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi)
|
|
|
|
|
|
|
|
|
|
val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava)
|
|
|
|
|
result.setOriginalId(originalIds)
|
|
|
|
|
|
|
|
|
|
// Add DataInfo
|
|
|
|
|
result.setDataInfo(generateDataInfo())
|
|
|
|
|
|
|
|
|
|
result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long])
|
|
|
|
|
result.setDateofcollection((json \ "indexed" \ "date-time").extract[String])
|
|
|
|
|
|
|
|
|
|
result.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava)
|
|
|
|
|
|
|
|
|
|
// Publisher ( Name of work's publisher mapped into Result/Publisher)
|
|
|
|
|
val publisher = (json \ "publisher").extractOrElse[String](null)
|
|
|
|
|
if (publisher != null && publisher.nonEmpty)
|
|
|
|
|
result.setPublisher(field(publisher, null))
|
|
|
|
|
|
|
|
|
|
// TITLE
|
|
|
|
|
val mainTitles =
|
|
|
|
|
for { JString(title) <- json \ "title" if title.nonEmpty } yield {
|
|
|
|
|
structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, null)
|
|
|
|
|
}
|
|
|
|
|
val originalTitles = for {
|
|
|
|
|
JString(title) <- json \ "original-title" if title.nonEmpty
|
|
|
|
|
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER, null)
|
|
|
|
|
val shortTitles = for {
|
|
|
|
|
JString(title) <- json \ "short-title" if title.nonEmpty
|
|
|
|
|
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER, null)
|
|
|
|
|
val subtitles =
|
|
|
|
|
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
|
|
|
|
|
title,
|
2024-03-20 17:04:52 +01:00
|
|
|
|
ModelConstants.SUBTITLE_QUALIFIER,
|
|
|
|
|
null
|
|
|
|
|
)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
|
|
|
|
|
|
|
|
|
|
// DESCRIPTION
|
|
|
|
|
val descriptionList =
|
|
|
|
|
for { JString(description) <- json \ "abstract" } yield field[String](description, null)
|
|
|
|
|
result.setDescription(descriptionList.asJava)
|
|
|
|
|
|
|
|
|
|
// Source
|
|
|
|
|
val sourceList = for {
|
|
|
|
|
JString(source) <- json \ "source" if source != null && source.nonEmpty
|
|
|
|
|
} yield field(source, null)
|
|
|
|
|
result.setSource(sourceList.asJava)
|
|
|
|
|
|
|
|
|
|
//RELEVANT DATE Mapping
|
|
|
|
|
val createdDate = generateDate(
|
|
|
|
|
(json \ "created" \ "date-time").extract[String],
|
|
|
|
|
(json \ "created" \ "date-parts").extract[List[List[Int]]],
|
|
|
|
|
"created",
|
|
|
|
|
ModelConstants.DNET_DATACITE_DATE
|
|
|
|
|
)
|
|
|
|
|
val postedDate = generateDate(
|
|
|
|
|
(json \ "posted" \ "date-time").extractOrElse[String](null),
|
|
|
|
|
(json \ "posted" \ "date-parts").extract[List[List[Int]]],
|
|
|
|
|
"available",
|
|
|
|
|
ModelConstants.DNET_DATACITE_DATE
|
|
|
|
|
)
|
|
|
|
|
val acceptedDate = generateDate(
|
|
|
|
|
(json \ "accepted" \ "date-time").extractOrElse[String](null),
|
|
|
|
|
(json \ "accepted" \ "date-parts").extract[List[List[Int]]],
|
|
|
|
|
"accepted",
|
|
|
|
|
ModelConstants.DNET_DATACITE_DATE
|
|
|
|
|
)
|
|
|
|
|
val publishedPrintDate = generateDate(
|
|
|
|
|
(json \ "published-print" \ "date-time").extractOrElse[String](null),
|
|
|
|
|
(json \ "published-print" \ "date-parts").extract[List[List[Int]]],
|
|
|
|
|
"published-print",
|
|
|
|
|
ModelConstants.DNET_DATACITE_DATE
|
|
|
|
|
)
|
|
|
|
|
val publishedOnlineDate = generateDate(
|
|
|
|
|
(json \ "published-online" \ "date-time").extractOrElse[String](null),
|
|
|
|
|
(json \ "published-online" \ "date-parts").extract[List[List[Int]]],
|
|
|
|
|
"published-online",
|
|
|
|
|
ModelConstants.DNET_DATACITE_DATE
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
val issuedDate = extractDate(
|
|
|
|
|
(json \ "issued" \ "date-time").extractOrElse[String](null),
|
|
|
|
|
(json \ "issued" \ "date-parts").extract[List[List[Int]]]
|
|
|
|
|
)
|
|
|
|
|
if (StringUtils.isNotBlank(issuedDate)) {
|
2024-03-20 17:04:52 +01:00
|
|
|
|
result.setDateofacceptance(field(issuedDate, null))
|
2024-03-13 06:56:10 +01:00
|
|
|
|
} else {
|
2024-03-20 17:04:52 +01:00
|
|
|
|
result.setDateofacceptance(field(createdDate.getValue, null))
|
2024-03-13 06:56:10 +01:00
|
|
|
|
}
|
|
|
|
|
result.setRelevantdate(
|
|
|
|
|
List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate)
|
|
|
|
|
.filter(p => p != null)
|
|
|
|
|
.asJava
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
//Mapping Subject
|
|
|
|
|
val subjectList: List[String] = (json \ "subject").extractOrElse[List[String]](List())
|
|
|
|
|
|
|
|
|
|
if (subjectList.nonEmpty) {
|
|
|
|
|
result.setSubject(
|
2024-03-20 17:04:52 +01:00
|
|
|
|
subjectList.map(s => subject(s, ModelConstants.SUBTITLE_QUALIFIER, null)).asJava
|
2024-03-13 06:56:10 +01:00
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//Mapping Author
|
|
|
|
|
val authorList: List[mappingAuthor] =
|
|
|
|
|
(json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined)
|
|
|
|
|
|
|
|
|
|
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
|
|
|
|
|
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
|
|
|
|
|
generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index)
|
|
|
|
|
}.asJava)
|
|
|
|
|
|
|
|
|
|
// Mapping instance
|
|
|
|
|
val instance = new Instance()
|
|
|
|
|
val license = for {
|
|
|
|
|
JObject(license) <- json \ "license"
|
|
|
|
|
JField("URL", JString(lic)) <- license
|
|
|
|
|
JField("content-version", JString(content_version)) <- license
|
2024-03-20 17:04:52 +01:00
|
|
|
|
} yield (field[String](lic, null), content_version)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue))
|
|
|
|
|
if (l.nonEmpty) {
|
|
|
|
|
if (l exists (d => d._2.equals("vor"))) {
|
|
|
|
|
for (d <- l) {
|
|
|
|
|
if (d._2.equals("vor")) {
|
|
|
|
|
instance.setLicense(d._1)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
instance.setLicense(l.head._1)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ticket #6281 added pid to Instance
|
|
|
|
|
instance.setPid(result.getPid)
|
|
|
|
|
|
|
|
|
|
val has_review = json \ "relation" \ "has-review" \ "id"
|
|
|
|
|
|
|
|
|
|
if (has_review != JNothing) {
|
|
|
|
|
instance.setRefereed(
|
|
|
|
|
OafMapperUtils.qualifier(
|
|
|
|
|
"0001",
|
|
|
|
|
"peerReviewed",
|
|
|
|
|
ModelConstants.DNET_REVIEW_LEVELS,
|
|
|
|
|
ModelConstants.DNET_REVIEW_LEVELS
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
instance.setAccessright(
|
|
|
|
|
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
|
|
|
|
)
|
2024-02-14 11:41:28 +01:00
|
|
|
|
instance.setInstancetype(instanceType)
|
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
//ADD ORIGINAL TYPE to the mapping
|
|
|
|
|
val itm = new InstanceTypeMapping
|
|
|
|
|
itm.setOriginalType(originalType)
|
|
|
|
|
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
|
|
|
|
instance.setInstanceTypeMapping(List(itm).asJava)
|
|
|
|
|
|
|
|
|
|
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
|
|
|
|
if (StringUtils.isNotBlank(issuedDate)) {
|
2024-03-20 17:04:52 +01:00
|
|
|
|
instance.setDateofacceptance(field(issuedDate, null))
|
2024-03-13 06:56:10 +01:00
|
|
|
|
} else {
|
2024-03-20 17:04:52 +01:00
|
|
|
|
instance.setDateofacceptance(field(createdDate.getValue, null))
|
2024-03-13 06:56:10 +01:00
|
|
|
|
}
|
|
|
|
|
val s: List[String] = List("https://doi.org/" + doi)
|
|
|
|
|
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
|
|
|
|
// if (links.nonEmpty) {
|
|
|
|
|
// instance.setUrl(links.asJava)
|
|
|
|
|
// }
|
|
|
|
|
if (s.nonEmpty) {
|
|
|
|
|
instance.setUrl(s.asJava)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result.setInstance(List(instance).asJava)
|
|
|
|
|
|
|
|
|
|
//IMPORTANT
|
|
|
|
|
//The old method result.setId(generateIdentifier(result, doi))
|
|
|
|
|
//is replaced using IdentifierFactory, but the old identifier
|
|
|
|
|
//is preserved among the originalId(s)
|
|
|
|
|
val oldId = generateIdentifier(result, doi)
|
|
|
|
|
result.setId(oldId)
|
|
|
|
|
|
|
|
|
|
val newId = IdentifierFactory.createDOIBoostIdentifier(result)
|
|
|
|
|
if (!oldId.equalsIgnoreCase(newId)) {
|
|
|
|
|
result.getOriginalId.add(oldId)
|
|
|
|
|
}
|
|
|
|
|
result.setId(newId)
|
|
|
|
|
|
|
|
|
|
if (result.getId == null)
|
|
|
|
|
null
|
|
|
|
|
else
|
|
|
|
|
result
|
|
|
|
|
}
|
2024-03-20 17:04:52 +01:00
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
def generateIdentifier(oaf: Result, doi: String): String = {
|
|
|
|
|
val id = DHPUtils.md5(doi.toLowerCase)
|
|
|
|
|
s"50|doiboost____|$id"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def generateAuhtor(given: String, family: String, orcid: String, index: Int): Author = {
|
|
|
|
|
val a = new Author
|
|
|
|
|
a.setName(given)
|
|
|
|
|
a.setSurname(family)
|
|
|
|
|
a.setFullname(s"$given $family")
|
|
|
|
|
a.setRank(index + 1)
|
|
|
|
|
if (StringUtils.isNotBlank(orcid))
|
|
|
|
|
a.setPid(
|
|
|
|
|
List(
|
|
|
|
|
structuredProperty(
|
|
|
|
|
orcid,
|
2024-03-20 17:04:52 +01:00
|
|
|
|
qualifier(
|
|
|
|
|
ModelConstants.ORCID_PENDING,
|
|
|
|
|
ModelConstants.ORCID_PENDING,
|
|
|
|
|
ModelConstants.DNET_PID_TYPES,
|
|
|
|
|
ModelConstants.DNET_PID_TYPES
|
|
|
|
|
),
|
2024-03-13 06:56:10 +01:00
|
|
|
|
generateDataInfo()
|
|
|
|
|
)
|
|
|
|
|
).asJava
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
a
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-14 11:41:28 +01:00
|
|
|
|
/** *
|
2024-03-20 17:04:52 +01:00
|
|
|
|
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
|
|
|
|
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
|
|
|
|
* to generate one of the following main entities:
|
|
|
|
|
* - publication
|
|
|
|
|
* - dataset
|
|
|
|
|
* - software
|
|
|
|
|
* - otherresearchproduct
|
|
|
|
|
*
|
|
|
|
|
* @param resourceType
|
|
|
|
|
* @param vocabularies
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2024-02-14 11:41:28 +01:00
|
|
|
|
def getTypeQualifier(
|
2024-03-20 17:04:52 +01:00
|
|
|
|
resourceType: String,
|
|
|
|
|
vocabularies: VocabularyGroup
|
|
|
|
|
): (Qualifier, Qualifier, String) = {
|
2024-02-14 11:41:28 +01:00
|
|
|
|
if (resourceType != null && resourceType.nonEmpty) {
|
|
|
|
|
val typeQualifier =
|
|
|
|
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
|
|
|
|
if (typeQualifier != null)
|
|
|
|
|
return (
|
|
|
|
|
typeQualifier,
|
|
|
|
|
vocabularies.getSynonymAsQualifier(
|
|
|
|
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
|
|
|
|
typeQualifier.getClassid
|
|
|
|
|
),
|
|
|
|
|
resourceType
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-26 17:26:47 +01:00
|
|
|
|
object TransformationType extends Enumeration {
|
|
|
|
|
type TransformationType = Value
|
|
|
|
|
val OnlyRelation, OnlyResult, All = Value
|
2024-03-25 18:18:10 +01:00
|
|
|
|
}
|
2024-03-26 17:26:47 +01:00
|
|
|
|
import TransformationType._
|
|
|
|
|
|
|
|
|
|
def mergeUnpayWall(r: Result, uw: UnpayWall): Result = {
|
|
|
|
|
if (uw != null) {
|
2024-03-25 18:18:10 +01:00
|
|
|
|
|
2024-03-26 17:26:47 +01:00
|
|
|
|
r.setCollectedfrom(List(r.getCollectedfrom.get(0), createUnpayWallCollectedFrom()).asJava)
|
|
|
|
|
val i: Instance = new Instance()
|
|
|
|
|
i.setCollectedfrom(createUnpayWallCollectedFrom())
|
|
|
|
|
if (uw.best_oa_location != null) {
|
|
|
|
|
i.setUrl(List(uw.best_oa_location.url).asJava)
|
|
|
|
|
if (uw.best_oa_location.license.isDefined) {
|
|
|
|
|
i.setLicense(field[String](uw.best_oa_location.license.get, null))
|
|
|
|
|
}
|
|
|
|
|
val colour = get_unpaywall_color(uw.oa_status)
|
|
|
|
|
if (colour.isDefined) {
|
|
|
|
|
val a = new AccessRight
|
|
|
|
|
a.setClassid(ModelConstants.ACCESS_RIGHT_OPEN)
|
|
|
|
|
a.setClassname(ModelConstants.ACCESS_RIGHT_OPEN)
|
|
|
|
|
a.setSchemeid(ModelConstants.DNET_ACCESS_MODES)
|
|
|
|
|
a.setSchemename(ModelConstants.DNET_ACCESS_MODES)
|
|
|
|
|
a.setOpenAccessRoute(colour.get)
|
|
|
|
|
i.setAccessright(a)
|
|
|
|
|
}
|
|
|
|
|
i.setInstancetype(r.getInstance().get(0).getInstancetype)
|
|
|
|
|
i.setInstanceTypeMapping(r.getInstance().get(0).getInstanceTypeMapping)
|
|
|
|
|
i.setPid(r.getPid)
|
|
|
|
|
r.setInstance(List(r.getInstance().get(0), i).asJava)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
r
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-03 17:07:14 +02:00
|
|
|
|
def generateAffliation(input: Row): List[String] = {
|
|
|
|
|
val doi = input.getString(0)
|
|
|
|
|
val rorId = input.getString(1)
|
|
|
|
|
|
|
|
|
|
val pubId = s"50|${PidType.doi.toString.padTo(12, "_")}::${DoiCleaningRule.normalizeDoi(doi)}"
|
|
|
|
|
val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId)
|
|
|
|
|
|
|
|
|
|
val r: Relation = new Relation
|
|
|
|
|
DoiCleaningRule.clean(doi)
|
|
|
|
|
r.setSource(pubId)
|
|
|
|
|
r.setTarget(affId)
|
|
|
|
|
r.setRelType(ModelConstants.RESULT_ORGANIZATION)
|
|
|
|
|
r.setRelClass(ModelConstants.HAS_AUTHOR_INSTITUTION)
|
|
|
|
|
r.setSubRelType(ModelConstants.AFFILIATION)
|
|
|
|
|
r.setDataInfo(generateDataInfo())
|
|
|
|
|
r.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava)
|
|
|
|
|
val r1: Relation = new Relation
|
|
|
|
|
r1.setTarget(pubId)
|
|
|
|
|
r1.setSource(affId)
|
|
|
|
|
r1.setRelType(ModelConstants.RESULT_ORGANIZATION)
|
|
|
|
|
r1.setRelClass(ModelConstants.IS_AUTHOR_INSTITUTION_OF)
|
|
|
|
|
r1.setSubRelType(ModelConstants.AFFILIATION)
|
|
|
|
|
r1.setDataInfo(generateDataInfo())
|
|
|
|
|
r1.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava)
|
|
|
|
|
List(mapper.writeValueAsString(r), mapper.writeValueAsString(r1))
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-26 17:26:47 +01:00
|
|
|
|
def convert(input: String, vocabularies: VocabularyGroup, mode: TransformationType): List[Oaf] = {
|
2024-03-25 18:18:10 +01:00
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
2024-03-26 17:26:47 +01:00
|
|
|
|
lazy val json: json4s.JValue = parse(input)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
2024-03-26 17:26:47 +01:00
|
|
|
|
var resultList: List[Oaf] = List()
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
|
|
|
|
val objectType = (json \ "type").extractOrElse[String](null)
|
|
|
|
|
if (objectType == null)
|
|
|
|
|
return resultList
|
2024-03-20 17:04:52 +01:00
|
|
|
|
val typology = getTypeQualifier(objectType, vocabularies)
|
2024-02-14 11:41:28 +01:00
|
|
|
|
|
|
|
|
|
if (typology == null)
|
|
|
|
|
return List()
|
2024-03-13 06:56:10 +01:00
|
|
|
|
|
2024-02-14 11:41:28 +01:00
|
|
|
|
val result = generateItemFromType(typology._2)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
if (result == null)
|
|
|
|
|
return List()
|
|
|
|
|
|
2024-02-14 11:41:28 +01:00
|
|
|
|
mappingResult(result, json, typology._1, typology._3)
|
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
if (result == null || result.getId == null)
|
|
|
|
|
return List()
|
|
|
|
|
|
|
|
|
|
result match {
|
2024-02-14 11:41:28 +01:00
|
|
|
|
case publication: Publication => convertPublication(publication, json, typology._1)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
case dataset: Dataset => convertDataset(dataset)
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-26 17:26:47 +01:00
|
|
|
|
//RELATION SECTION
|
|
|
|
|
if (mode == OnlyRelation || mode == All) {
|
|
|
|
|
val funderList: List[mappingFunder] =
|
|
|
|
|
(json \ "funder").extractOrElse[List[mappingFunder]](List())
|
|
|
|
|
|
|
|
|
|
if (funderList.nonEmpty) {
|
|
|
|
|
resultList = resultList ::: mappingFunderToRelations(
|
|
|
|
|
funderList,
|
|
|
|
|
result.getId,
|
|
|
|
|
createCrossrefCollectedFrom(),
|
|
|
|
|
result.getDataInfo,
|
|
|
|
|
result.getLastupdatetimestamp
|
|
|
|
|
)
|
|
|
|
|
// .map(s => CrossrefResult(s.getClass.getSimpleName, mapper.writeValueAsString(s)))
|
|
|
|
|
}
|
|
|
|
|
val doisReference: List[String] = for {
|
|
|
|
|
JObject(reference_json) <- json \ "reference"
|
|
|
|
|
JField("DOI", JString(doi_json)) <- reference_json
|
|
|
|
|
} yield doi_json
|
|
|
|
|
|
|
|
|
|
if (doisReference != null && doisReference.nonEmpty) {
|
|
|
|
|
val citation_relations: List[Relation] = generateCitationRelations(doisReference, result)
|
|
|
|
|
resultList = resultList ::: citation_relations
|
2024-03-25 18:18:10 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-20 17:04:52 +01:00
|
|
|
|
if (!filterResult(result))
|
|
|
|
|
List()
|
2024-03-26 17:26:47 +01:00
|
|
|
|
else {
|
|
|
|
|
if (mode == OnlyResult || mode == All)
|
|
|
|
|
resultList ::: List(result)
|
|
|
|
|
else
|
|
|
|
|
resultList
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// if (uw != null) {
|
|
|
|
|
// result.getCollectedfrom.add(createUnpayWallCollectedFrom())
|
|
|
|
|
// val i: Instance = new Instance()
|
|
|
|
|
// i.setCollectedfrom(createUnpayWallCollectedFrom())
|
|
|
|
|
// if (uw.best_oa_location != null) {
|
|
|
|
|
//
|
|
|
|
|
// i.setUrl(List(uw.best_oa_location.url).asJava)
|
|
|
|
|
// if (uw.best_oa_location.license.isDefined) {
|
|
|
|
|
// i.setLicense(field[String](uw.best_oa_location.license.get, null))
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// val colour = get_unpaywall_color(uw.oa_status)
|
|
|
|
|
// if (colour.isDefined) {
|
|
|
|
|
// val a = new AccessRight
|
|
|
|
|
// a.setClassid(ModelConstants.ACCESS_RIGHT_OPEN)
|
|
|
|
|
// a.setClassname(ModelConstants.ACCESS_RIGHT_OPEN)
|
|
|
|
|
// a.setSchemeid(ModelConstants.DNET_ACCESS_MODES)
|
|
|
|
|
// a.setSchemename(ModelConstants.DNET_ACCESS_MODES)
|
|
|
|
|
// a.setOpenAccessRoute(colour.get)
|
|
|
|
|
// i.setAccessright(a)
|
|
|
|
|
// }
|
|
|
|
|
// i.setPid(result.getPid)
|
|
|
|
|
// result.getInstance().add(i)
|
|
|
|
|
// }
|
|
|
|
|
// }
|
2024-03-20 17:04:52 +01:00
|
|
|
|
|
2024-03-13 06:56:10 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def createCiteRelation(source: Result, targetPid: String, targetPidType: String): List[Relation] = {
|
|
|
|
|
|
|
|
|
|
val targetId = IdentifierFactory.idFromPid("50", targetPidType, targetPid, true)
|
|
|
|
|
|
|
|
|
|
val from = new Relation
|
|
|
|
|
from.setSource(source.getId)
|
|
|
|
|
from.setTarget(targetId)
|
|
|
|
|
from.setRelType(ModelConstants.RESULT_RESULT)
|
|
|
|
|
from.setRelClass(ModelConstants.CITES)
|
|
|
|
|
from.setSubRelType(ModelConstants.CITATION)
|
|
|
|
|
from.setCollectedfrom(source.getCollectedfrom)
|
|
|
|
|
from.setDataInfo(source.getDataInfo)
|
|
|
|
|
from.setLastupdatetimestamp(source.getLastupdatetimestamp)
|
|
|
|
|
|
|
|
|
|
List(from)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def generateCitationRelations(dois: List[String], result: Result): List[Relation] = {
|
|
|
|
|
dois.flatMap(d => createCiteRelation(result, d, "doi"))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def mappingFunderToRelations(
|
|
|
|
|
funders: List[mappingFunder],
|
|
|
|
|
sourceId: String,
|
|
|
|
|
cf: KeyValue,
|
|
|
|
|
di: DataInfo,
|
|
|
|
|
ts: Long
|
|
|
|
|
): List[Relation] = {
|
|
|
|
|
|
|
|
|
|
val queue = new mutable.Queue[Relation]
|
|
|
|
|
|
|
|
|
|
def snsfRule(award: String): String = {
|
|
|
|
|
val tmp1 = StringUtils.substringAfter(award, "_")
|
|
|
|
|
val tmp2 = StringUtils.substringBefore(tmp1, "/")
|
|
|
|
|
logger.debug(s"From $award to $tmp2")
|
|
|
|
|
tmp2
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def extractECAward(award: String): String = {
|
|
|
|
|
val awardECRegex: Regex = "[0-9]{4,9}".r
|
|
|
|
|
if (awardECRegex.findAllIn(award).hasNext)
|
|
|
|
|
return awardECRegex.findAllIn(award).max
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def generateRelation(sourceId: String, targetId: String, relClass: String): Relation = {
|
|
|
|
|
|
|
|
|
|
val r = new Relation
|
|
|
|
|
r.setSource(sourceId)
|
|
|
|
|
r.setTarget(targetId)
|
|
|
|
|
r.setRelType(ModelConstants.RESULT_PROJECT)
|
|
|
|
|
r.setRelClass(relClass)
|
|
|
|
|
r.setSubRelType(ModelConstants.OUTCOME)
|
|
|
|
|
r.setCollectedfrom(List(cf).asJava)
|
|
|
|
|
r.setDataInfo(di)
|
|
|
|
|
r.setLastupdatetimestamp(ts)
|
|
|
|
|
r
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def generateSimpleRelationFromAward(
|
|
|
|
|
funder: mappingFunder,
|
|
|
|
|
nsPrefix: String,
|
|
|
|
|
extractField: String => String
|
|
|
|
|
): Unit = {
|
|
|
|
|
if (funder.award.isDefined && funder.award.get.nonEmpty)
|
|
|
|
|
funder.award.get
|
|
|
|
|
.map(extractField)
|
|
|
|
|
.filter(a => a != null && a.nonEmpty)
|
|
|
|
|
.foreach(award => {
|
|
|
|
|
val targetId = getProjectId(nsPrefix, DHPUtils.md5(award))
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def getProjectId(nsPrefix: String, targetId: String): String = {
|
|
|
|
|
s"40|$nsPrefix::$targetId"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (funders != null)
|
|
|
|
|
funders.foreach(funder => {
|
|
|
|
|
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
|
|
|
|
|
|
|
|
|
|
if (getIrishId(funder.DOI.get).isDefined) {
|
|
|
|
|
val nsPrefix = getIrishId(funder.DOI.get).get.padTo(12, '_')
|
|
|
|
|
val targetId = getProjectId(nsPrefix, "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
funder.DOI.get match {
|
|
|
|
|
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
|
|
|
|
|
"10.13039/100010665" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
|
|
|
|
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
|
|
|
|
case "10.13039/501100000781" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda_____he", extractECAward)
|
|
|
|
|
case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a)
|
|
|
|
|
case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
|
|
|
|
|
case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a)
|
|
|
|
|
case "10.13039/501100001602" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""))
|
|
|
|
|
case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a)
|
|
|
|
|
case "10.13039/501100000038" =>
|
|
|
|
|
val targetId = getProjectId("nserc_______", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
case "10.13039/501100000155" =>
|
|
|
|
|
val targetId = getProjectId("sshrc_______", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
case "10.13039/501100000024" =>
|
|
|
|
|
val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
|
|
|
|
|
case "10.13039/100020031" =>
|
|
|
|
|
val targetId = getProjectId("tara________", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
|
|
|
|
|
case "10.13039/501100005416" => generateSimpleRelationFromAward(funder, "rcn_________", a => a)
|
|
|
|
|
case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
|
|
|
|
|
case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
|
|
|
|
|
case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a => a)
|
|
|
|
|
case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
|
|
|
|
|
case "10.13039/501100003407" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "miur________", a => a)
|
|
|
|
|
val targetId = getProjectId("miur________", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
case "10.13039/501100006588" | "10.13039/501100004488" =>
|
|
|
|
|
generateSimpleRelationFromAward(
|
|
|
|
|
funder,
|
|
|
|
|
"irb_hr______",
|
|
|
|
|
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", "")
|
|
|
|
|
)
|
|
|
|
|
case "10.13039/501100006769" => generateSimpleRelationFromAward(funder, "rsf_________", a => a)
|
|
|
|
|
case "10.13039/501100001711" => generateSimpleRelationFromAward(funder, "snsf________", snsfRule)
|
|
|
|
|
case "10.13039/501100004410" => generateSimpleRelationFromAward(funder, "tubitakf____", a => a)
|
|
|
|
|
case "10.13039/100004440" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "wt__________", a => a)
|
|
|
|
|
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
//ASAP
|
|
|
|
|
case "10.13039/100018231" => generateSimpleRelationFromAward(funder, "asap________", a => a)
|
|
|
|
|
//CHIST-ERA
|
|
|
|
|
case "10.13039/501100001942" =>
|
|
|
|
|
val targetId = getProjectId("chistera____", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
//HE
|
|
|
|
|
case "10.13039/100018693" | "10.13039/100018694" | "10.13039/100019188" | "10.13039/100019180" |
|
|
|
|
|
"10.13039/100018695" | "10.13039/100019185" | "10.13039/100019186" | "10.13039/100019187" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda_____he", extractECAward)
|
|
|
|
|
//FCT
|
|
|
|
|
case "10.13039/501100001871" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "fct_________", a => a)
|
|
|
|
|
//NHMRC
|
|
|
|
|
case "10.13039/501100000925" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "nhmrc_______", a => a)
|
|
|
|
|
//NIH
|
|
|
|
|
case "10.13039/100000002" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "nih_________", a => a)
|
|
|
|
|
//NWO
|
|
|
|
|
case "10.13039/501100003246" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "nwo_________", a => a)
|
|
|
|
|
//UKRI
|
|
|
|
|
case "10.13039/100014013" | "10.13039/501100000267" | "10.13039/501100000268" | "10.13039/501100000269" |
|
|
|
|
|
"10.13039/501100000266" | "10.13039/501100006041" | "10.13039/501100000265" | "10.13039/501100000270" |
|
|
|
|
|
"10.13039/501100013589" | "10.13039/501100000271" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "ukri________", a => a)
|
2024-04-03 14:59:09 +02:00
|
|
|
|
//HFRI
|
|
|
|
|
case "10.13039/501100013209" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "hfri________", a => a)
|
|
|
|
|
val targetId = getProjectId("hfri________", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
2024-04-05 18:12:53 +02:00
|
|
|
|
//ERASMUS+
|
2024-04-03 14:59:09 +02:00
|
|
|
|
case "10.13039/501100010790" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "erasmusplus_", a => a)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
case _ => logger.debug("no match for " + funder.DOI.get)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
funder.name match {
|
|
|
|
|
case "European Union’s Horizon 2020 research and innovation program" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
|
|
|
|
case "European Union's" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
|
|
|
|
generateSimpleRelationFromAward(funder, "corda_____he", extractECAward)
|
|
|
|
|
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "anr_________", a => a)
|
|
|
|
|
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "conicytf____", a => a)
|
|
|
|
|
case "Wellcome Trust Masters Fellowship" =>
|
|
|
|
|
generateSimpleRelationFromAward(funder, "wt__________", a => a)
|
|
|
|
|
val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
|
|
|
|
|
queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY)
|
|
|
|
|
queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES)
|
|
|
|
|
case _ => logger.debug("no match for " + funder.name)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
})
|
|
|
|
|
queue.toList
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def convertDataset(dataset: Dataset): Unit = {
|
|
|
|
|
// TODO check if there are other info to map into the Dataset
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-14 11:41:28 +01:00
|
|
|
|
def convertPublication(publication: Publication, json: JValue, cobjCategory: Qualifier): Unit = {
|
2024-03-13 06:56:10 +01:00
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
|
|
|
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
|
|
|
|
|
|
|
|
|
//Mapping book
|
2024-02-14 11:41:28 +01:00
|
|
|
|
if (cobjCategory.getClassname.toLowerCase.contains("book")) {
|
2024-03-13 06:56:10 +01:00
|
|
|
|
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
|
|
|
|
|
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
|
|
|
|
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
|
|
|
|
if (publication.getSource != null) {
|
|
|
|
|
val l: List[Field[String]] = publication.getSource.asScala.toList
|
2024-03-20 17:04:52 +01:00
|
|
|
|
val ll: List[Field[String]] = l ::: List(field(source, null))
|
2024-03-13 06:56:10 +01:00
|
|
|
|
publication.setSource(ll.asJava)
|
|
|
|
|
} else
|
2024-03-20 17:04:52 +01:00
|
|
|
|
publication.setSource(List(field(source, null)).asJava)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// Mapping Journal
|
|
|
|
|
|
|
|
|
|
val issnInfos = for {
|
|
|
|
|
JArray(issn_types) <- json \ "issn-type"
|
|
|
|
|
JObject(issn_type) <- issn_types
|
|
|
|
|
JField("type", JString(tp)) <- issn_type
|
|
|
|
|
JField("value", JString(vl)) <- issn_type
|
|
|
|
|
} yield Tuple2(tp, vl)
|
|
|
|
|
|
|
|
|
|
val volume = (json \ "volume").extractOrElse[String](null)
|
|
|
|
|
if (containerTitles.nonEmpty) {
|
|
|
|
|
val journal = new Journal
|
|
|
|
|
journal.setName(containerTitles.head)
|
|
|
|
|
if (issnInfos.nonEmpty) {
|
|
|
|
|
|
|
|
|
|
issnInfos.foreach(tp => {
|
|
|
|
|
tp._1 match {
|
|
|
|
|
case "electronic" => journal.setIssnOnline(tp._2)
|
|
|
|
|
case "print" => journal.setIssnPrinted(tp._2)
|
2024-04-26 16:03:04 +02:00
|
|
|
|
case _ =>
|
2024-03-13 06:56:10 +01:00
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
journal.setVol(volume)
|
|
|
|
|
val page = (json \ "page").extractOrElse[String](null)
|
|
|
|
|
if (page != null) {
|
|
|
|
|
val pp = page.split("-")
|
|
|
|
|
if (pp.nonEmpty)
|
|
|
|
|
journal.setSp(pp.head)
|
|
|
|
|
if (pp.size > 1)
|
|
|
|
|
journal.setEp(pp(1))
|
|
|
|
|
}
|
|
|
|
|
publication.setJournal(journal)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
|
|
|
|
if (StringUtils.isNotBlank(dt))
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dt)
|
|
|
|
|
if (datePart != null && datePart.size == 1) {
|
|
|
|
|
val res = datePart.head
|
|
|
|
|
if (res.size == 3) {
|
|
|
|
|
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
|
|
|
|
|
if (dp.length == 10) {
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dp)
|
|
|
|
|
}
|
|
|
|
|
} else if (res.size == 2) {
|
|
|
|
|
val dp = f"${res.head}-${res(1)}%02d-01"
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(dp)
|
|
|
|
|
} else if (res.size == 1) {
|
|
|
|
|
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
null
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def generateDate(
|
|
|
|
|
dt: String,
|
|
|
|
|
datePart: List[List[Int]],
|
|
|
|
|
classId: String,
|
|
|
|
|
schemeId: String
|
|
|
|
|
): StructuredProperty = {
|
|
|
|
|
val dp = extractDate(dt, datePart)
|
|
|
|
|
if (StringUtils.isNotBlank(dp))
|
2024-03-20 17:04:52 +01:00
|
|
|
|
return structuredProperty(dp, qualifier(classId, classId, schemeId, schemeId), null)
|
2024-03-13 06:56:10 +01:00
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-14 11:41:28 +01:00
|
|
|
|
def generateItemFromType(objectType: Qualifier): Result = {
|
|
|
|
|
if (objectType.getClassid.equalsIgnoreCase("publication")) {
|
|
|
|
|
val item = new Publication
|
|
|
|
|
item.setResourcetype(objectType)
|
|
|
|
|
return item
|
|
|
|
|
} else if (objectType.getClassid.equalsIgnoreCase("dataset")) {
|
|
|
|
|
val item = new Dataset
|
|
|
|
|
item.setResourcetype(objectType)
|
|
|
|
|
return item
|
2024-03-20 17:04:52 +01:00
|
|
|
|
} else if (objectType.getClassid.equalsIgnoreCase("software")) {
|
2024-02-14 11:41:28 +01:00
|
|
|
|
val item = new Software
|
|
|
|
|
item.setResourcetype(objectType)
|
|
|
|
|
return item
|
2024-03-20 17:04:52 +01:00
|
|
|
|
} else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")) {
|
2024-02-14 11:41:28 +01:00
|
|
|
|
val item = new OtherResearchProduct
|
|
|
|
|
item.setResourcetype(objectType)
|
|
|
|
|
return item
|
2024-03-13 06:56:10 +01:00
|
|
|
|
}
|
|
|
|
|
null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|