2021-10-20 17:37:42 +02:00
|
|
|
package eu.dnetlib.dhp.datacite
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper
|
|
|
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
2021-11-25 10:54:13 +01:00
|
|
|
import eu.dnetlib.dhp.datacite.DataciteModelConstants._
|
2021-01-28 16:34:46 +01:00
|
|
|
import eu.dnetlib.dhp.schema.action.AtomicAction
|
2021-03-31 17:07:13 +02:00
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
2021-06-04 15:10:20 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
2021-12-06 11:26:36 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _}
|
2021-01-28 16:34:46 +01:00
|
|
|
import eu.dnetlib.dhp.utils.DHPUtils
|
|
|
|
import org.apache.commons.lang3.StringUtils
|
|
|
|
import org.json4s.DefaultFormats
|
|
|
|
import org.json4s.JsonAST.{JField, JObject, JString}
|
|
|
|
import org.json4s.jackson.JsonMethods.parse
|
|
|
|
|
2021-01-29 10:45:07 +01:00
|
|
|
import java.text.SimpleDateFormat
|
2021-01-28 16:34:46 +01:00
|
|
|
import java.time.LocalDate
|
2021-06-21 10:39:46 +02:00
|
|
|
import java.time.chrono.ThaiBuddhistDate
|
2021-01-28 16:34:46 +01:00
|
|
|
import java.time.format.DateTimeFormatter
|
2021-06-04 15:10:20 +02:00
|
|
|
import java.util.{Date, Locale}
|
2021-01-28 16:34:46 +01:00
|
|
|
import scala.collection.JavaConverters._
|
|
|
|
|
|
|
|
object DataciteToOAFTransformation {
|
|
|
|
|
|
|
|
val mapper = new ObjectMapper()
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
/** This method should skip record if json contains invalid text
|
|
|
|
* defined in gile datacite_filter
|
|
|
|
*
|
|
|
|
* @param json
|
|
|
|
* @return True if the record should be skipped
|
|
|
|
*/
|
2021-11-25 10:54:13 +01:00
|
|
|
def skip_record(json: String): Boolean = {
|
|
|
|
datacite_filter.exists(f => json.contains(f))
|
2021-01-28 16:34:46 +01:00
|
|
|
}
|
|
|
|
|
2021-11-25 10:54:13 +01:00
|
|
|
@deprecated("this method will be removed", "dhp")
|
2021-04-20 09:44:44 +02:00
|
|
|
def toActionSet(item: Oaf): (String, String) = {
|
2021-01-28 16:34:46 +01:00
|
|
|
val mapper = new ObjectMapper()
|
|
|
|
|
|
|
|
item match {
|
|
|
|
case dataset: OafDataset =>
|
|
|
|
val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset]
|
|
|
|
a.setClazz(classOf[OafDataset])
|
|
|
|
a.setPayload(dataset)
|
|
|
|
(dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
|
|
case publication: Publication =>
|
|
|
|
val a: AtomicAction[Publication] = new AtomicAction[Publication]
|
|
|
|
a.setClazz(classOf[Publication])
|
|
|
|
a.setPayload(publication)
|
|
|
|
(publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
|
|
case software: Software =>
|
|
|
|
val a: AtomicAction[Software] = new AtomicAction[Software]
|
|
|
|
a.setClazz(classOf[Software])
|
|
|
|
a.setPayload(software)
|
|
|
|
(software.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
|
|
case orp: OtherResearchProduct =>
|
|
|
|
val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct]
|
|
|
|
a.setClazz(classOf[OtherResearchProduct])
|
|
|
|
a.setPayload(orp)
|
|
|
|
(orp.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
|
|
|
|
|
|
case relation: Relation =>
|
|
|
|
val a: AtomicAction[Relation] = new AtomicAction[Relation]
|
|
|
|
a.setClazz(classOf[Relation])
|
|
|
|
a.setPayload(relation)
|
|
|
|
(relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
|
|
case _ =>
|
|
|
|
null
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
def embargo_end(embargo_end_date: String): Boolean = {
|
|
|
|
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
|
|
|
|
val td = LocalDate.now()
|
|
|
|
td.isAfter(dt)
|
|
|
|
}
|
|
|
|
|
|
|
|
def extract_date(input: String): Option[String] = {
|
2022-01-11 16:57:48 +01:00
|
|
|
val d = Date_regex
|
|
|
|
.map(pattern => {
|
|
|
|
val matcher = pattern.matcher(input)
|
|
|
|
if (matcher.find())
|
|
|
|
matcher.group(0)
|
|
|
|
else
|
|
|
|
null
|
|
|
|
})
|
|
|
|
.find(s => s != null)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
if (d.isDefined) {
|
|
|
|
val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get
|
|
|
|
try {
|
|
|
|
return Some(LocalDate.parse(a_date, df_en).toString)
|
|
|
|
} catch {
|
2022-01-11 16:57:48 +01:00
|
|
|
case _: Throwable =>
|
|
|
|
try {
|
|
|
|
return Some(LocalDate.parse(a_date, df_it).toString)
|
|
|
|
} catch {
|
|
|
|
case _: Throwable =>
|
|
|
|
return None
|
|
|
|
}
|
2021-01-28 16:34:46 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
d
|
|
|
|
}
|
|
|
|
|
2021-12-06 11:26:36 +01:00
|
|
|
def fix_thai_date(input: String, format: String): String = {
|
2021-06-21 10:39:46 +02:00
|
|
|
try {
|
2021-12-06 11:26:36 +01:00
|
|
|
val a_date = LocalDate.parse(input, DateTimeFormatter.ofPattern(format))
|
2021-06-21 10:39:46 +02:00
|
|
|
val d = ThaiBuddhistDate.of(a_date.getYear, a_date.getMonth.getValue, a_date.getDayOfMonth)
|
|
|
|
LocalDate.from(d).toString
|
|
|
|
} catch {
|
|
|
|
case _: Throwable => ""
|
|
|
|
}
|
|
|
|
}
|
2021-11-25 10:54:13 +01:00
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
def getTypeQualifier(
|
|
|
|
resourceType: String,
|
|
|
|
resourceTypeGeneral: String,
|
|
|
|
schemaOrg: String,
|
|
|
|
vocabularies: VocabularyGroup
|
|
|
|
): (Qualifier, Qualifier) = {
|
2021-01-28 16:34:46 +01:00
|
|
|
if (resourceType != null && resourceType.nonEmpty) {
|
2022-01-11 16:57:48 +01:00
|
|
|
val typeQualifier =
|
|
|
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
2021-01-28 16:34:46 +01:00
|
|
|
if (typeQualifier != null)
|
2022-01-11 16:57:48 +01:00
|
|
|
return (
|
|
|
|
typeQualifier,
|
|
|
|
vocabularies.getSynonymAsQualifier(
|
|
|
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
|
|
|
typeQualifier.getClassid
|
|
|
|
)
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
}
|
|
|
|
if (schemaOrg != null && schemaOrg.nonEmpty) {
|
2022-01-11 16:57:48 +01:00
|
|
|
val typeQualifier =
|
|
|
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, schemaOrg)
|
2021-01-28 16:34:46 +01:00
|
|
|
if (typeQualifier != null)
|
2022-01-11 16:57:48 +01:00
|
|
|
return (
|
|
|
|
typeQualifier,
|
|
|
|
vocabularies.getSynonymAsQualifier(
|
|
|
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
|
|
|
typeQualifier.getClassid
|
|
|
|
)
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) {
|
2022-01-11 16:57:48 +01:00
|
|
|
val typeQualifier = vocabularies.getSynonymAsQualifier(
|
|
|
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
|
|
|
resourceTypeGeneral
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
if (typeQualifier != null)
|
2022-01-11 16:57:48 +01:00
|
|
|
return (
|
|
|
|
typeQualifier,
|
|
|
|
vocabularies.getSynonymAsQualifier(
|
|
|
|
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
|
|
|
typeQualifier.getClassid
|
|
|
|
)
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
null
|
|
|
|
}
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
def getResult(
|
|
|
|
resourceType: String,
|
|
|
|
resourceTypeGeneral: String,
|
|
|
|
schemaOrg: String,
|
|
|
|
vocabularies: VocabularyGroup
|
|
|
|
): Result = {
|
|
|
|
val typeQualifiers: (Qualifier, Qualifier) =
|
|
|
|
getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
2021-01-28 16:34:46 +01:00
|
|
|
if (typeQualifiers == null)
|
|
|
|
return null
|
|
|
|
val i = new Instance
|
|
|
|
i.setInstancetype(typeQualifiers._1)
|
|
|
|
typeQualifiers._2.getClassname match {
|
|
|
|
case "dataset" =>
|
|
|
|
val r = new OafDataset
|
|
|
|
r.setInstance(List(i).asJava)
|
|
|
|
return r
|
|
|
|
case "publication" =>
|
|
|
|
val r = new Publication
|
|
|
|
r.setInstance(List(i).asJava)
|
|
|
|
return r
|
|
|
|
case "software" =>
|
|
|
|
val r = new Software
|
|
|
|
r.setInstance(List(i).asJava)
|
|
|
|
return r
|
|
|
|
case "other" =>
|
|
|
|
val r = new OtherResearchProduct
|
|
|
|
r.setInstance(List(i).asJava)
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
null
|
|
|
|
}
|
|
|
|
|
|
|
|
def available_date(input: String): Boolean = {
|
|
|
|
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
|
|
lazy val json: org.json4s.JValue = parse(input)
|
|
|
|
val l: List[String] = for {
|
2022-01-11 16:57:48 +01:00
|
|
|
JObject(dates) <- json \\ "dates"
|
2021-01-28 16:34:46 +01:00
|
|
|
JField("dateType", JString(dateTypes)) <- dates
|
|
|
|
} yield dateTypes
|
|
|
|
|
|
|
|
l.exists(p => p.equalsIgnoreCase("available"))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
/** As describe in ticket #6377
|
|
|
|
* when the result come from figshare we need to remove subject
|
|
|
|
* and set Access rights OPEN.
|
|
|
|
*
|
|
|
|
* @param r
|
|
|
|
*/
|
2021-04-20 09:44:44 +02:00
|
|
|
def fix_figshare(r: Result): Unit = {
|
|
|
|
|
|
|
|
if (r.getInstance() != null) {
|
2022-01-11 16:57:48 +01:00
|
|
|
val hosted_by_figshare = r
|
|
|
|
.getInstance()
|
|
|
|
.asScala
|
|
|
|
.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
|
2021-04-20 09:44:44 +02:00
|
|
|
if (hosted_by_figshare) {
|
|
|
|
r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
|
|
|
|
val l: List[StructuredProperty] = List()
|
|
|
|
r.setSubject(l.asJava)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-06-04 10:14:22 +02:00
|
|
|
def createDNetTargetIdentifier(pid: String, pidType: String, idPrefix: String): String = {
|
|
|
|
val f_part = s"$idPrefix|${pidType.toLowerCase}".padTo(15, '_')
|
|
|
|
s"$f_part::${IdentifierFactory.md5(pid.toLowerCase)}"
|
|
|
|
}
|
|
|
|
|
2021-01-28 16:34:46 +01:00
|
|
|
def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
|
|
|
|
OafMapperUtils.structuredProperty(dt, q, null)
|
|
|
|
}
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
def generateRelation(
|
|
|
|
sourceId: String,
|
|
|
|
targetId: String,
|
|
|
|
relClass: String,
|
|
|
|
cf: KeyValue,
|
|
|
|
di: DataInfo
|
|
|
|
): Relation = {
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
val r = new Relation
|
|
|
|
r.setSource(sourceId)
|
|
|
|
r.setTarget(targetId)
|
2021-03-31 18:33:57 +02:00
|
|
|
r.setRelType(ModelConstants.RESULT_PROJECT)
|
2021-01-28 16:34:46 +01:00
|
|
|
r.setRelClass(relClass)
|
2021-03-31 18:33:57 +02:00
|
|
|
r.setSubRelType(ModelConstants.OUTCOME)
|
2021-01-28 16:34:46 +01:00
|
|
|
r.setCollectedfrom(List(cf).asJava)
|
|
|
|
r.setDataInfo(di)
|
|
|
|
r
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = {
|
|
|
|
val match_pattern = funder_regex.find(s => s._1.matcher(awardUri).find())
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
if (match_pattern.isDefined) {
|
2021-04-20 09:44:44 +02:00
|
|
|
val m = match_pattern.get._1
|
2021-01-28 16:34:46 +01:00
|
|
|
val p = match_pattern.get._2
|
|
|
|
val grantId = m.matcher(awardUri).replaceAll("$2")
|
|
|
|
val targetId = s"$p${DHPUtils.md5(grantId)}"
|
2021-12-06 11:26:36 +01:00
|
|
|
List(generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo))
|
2022-01-11 16:57:48 +01:00
|
|
|
} else
|
2021-01-28 16:34:46 +01:00
|
|
|
List()
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
def generateOAF(
|
|
|
|
input: String,
|
|
|
|
ts: Long,
|
|
|
|
dateOfCollection: Long,
|
|
|
|
vocabularies: VocabularyGroup,
|
|
|
|
exportLinks: Boolean
|
|
|
|
): List[Oaf] = {
|
2021-11-25 10:54:13 +01:00
|
|
|
if (skip_record(input))
|
2021-04-20 09:44:44 +02:00
|
|
|
return List()
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
|
|
lazy val json = parse(input)
|
|
|
|
|
|
|
|
val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
|
2022-01-11 16:57:48 +01:00
|
|
|
val resourceTypeGeneral =
|
|
|
|
(json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
|
2021-01-28 16:34:46 +01:00
|
|
|
val schemaOrg = (json \ "attributes" \ "types" \ "schemaOrg").extractOrElse[String](null)
|
|
|
|
|
|
|
|
val doi = (json \ "attributes" \ "doi").extract[String]
|
|
|
|
if (doi.isEmpty)
|
|
|
|
return List()
|
|
|
|
|
|
|
|
//Mapping type based on vocabularies dnet:publication_resource and dnet:result_typologies
|
|
|
|
val result = getResult(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
|
|
|
if (result == null)
|
|
|
|
return List()
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
val doi_q = OafMapperUtils.qualifier(
|
|
|
|
"doi",
|
|
|
|
"doi",
|
|
|
|
ModelConstants.DNET_PID_TYPES,
|
|
|
|
ModelConstants.DNET_PID_TYPES
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
|
|
|
|
result.setPid(List(pid).asJava)
|
|
|
|
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
|
|
|
|
result.setOriginalId(List(doi).asJava)
|
2021-01-29 10:45:07 +01:00
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
val d = new Date(dateOfCollection * 1000)
|
2021-01-29 10:45:07 +01:00
|
|
|
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
|
|
|
|
|
|
|
|
result.setDateofcollection(ISO8601FORMAT.format(d))
|
2021-07-27 16:09:30 +02:00
|
|
|
result.setDateoftransformation(ISO8601FORMAT.format(d))
|
2021-01-28 16:34:46 +01:00
|
|
|
result.setDataInfo(dataInfo)
|
|
|
|
|
|
|
|
val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List())
|
|
|
|
|
|
|
|
val authors = creators.zipWithIndex.map { case (c, idx) =>
|
|
|
|
val a = new Author
|
|
|
|
a.setFullname(c.name.orNull)
|
|
|
|
a.setName(c.givenName.orNull)
|
|
|
|
a.setSurname(c.familyName.orNull)
|
2022-01-11 16:57:48 +01:00
|
|
|
if (
|
|
|
|
c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null
|
|
|
|
) {
|
|
|
|
a.setPid(
|
|
|
|
c.nameIdentifiers.get
|
|
|
|
.map(ni => {
|
|
|
|
val q =
|
|
|
|
if (ni.nameIdentifierScheme.isDefined)
|
|
|
|
vocabularies.getTermAsQualifier(
|
|
|
|
ModelConstants.DNET_PID_TYPES,
|
|
|
|
ni.nameIdentifierScheme.get.toLowerCase()
|
|
|
|
)
|
|
|
|
else null
|
|
|
|
if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) {
|
|
|
|
OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo)
|
|
|
|
} else
|
|
|
|
null
|
|
|
|
|
|
|
|
})
|
|
|
|
.asJava
|
2021-01-28 16:34:46 +01:00
|
|
|
)
|
|
|
|
}
|
|
|
|
if (c.affiliation.isDefined)
|
2022-01-11 16:57:48 +01:00
|
|
|
a.setAffiliation(
|
|
|
|
c.affiliation.get
|
|
|
|
.filter(af => af.nonEmpty)
|
|
|
|
.map(af => OafMapperUtils.field(af, dataInfo))
|
|
|
|
.asJava
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
a.setRank(idx + 1)
|
|
|
|
a
|
|
|
|
}
|
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
|
2021-01-28 16:34:46 +01:00
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
result.setTitle(
|
|
|
|
titles
|
|
|
|
.filter(t => t.title.nonEmpty)
|
|
|
|
.map(t => {
|
|
|
|
if (t.titleType.isEmpty) {
|
|
|
|
OafMapperUtils
|
|
|
|
.structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null)
|
|
|
|
} else {
|
|
|
|
OafMapperUtils.structuredProperty(
|
|
|
|
t.title.get,
|
|
|
|
t.titleType.get,
|
|
|
|
t.titleType.get,
|
|
|
|
ModelConstants.DNET_DATACITE_TITLE,
|
|
|
|
ModelConstants.DNET_DATACITE_TITLE,
|
|
|
|
null
|
|
|
|
)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.asJava
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
|
2021-01-28 16:34:46 +01:00
|
|
|
return List()
|
|
|
|
result.setAuthor(authors.asJava)
|
|
|
|
|
|
|
|
val dates = (json \\ "dates").extract[List[DateType]]
|
|
|
|
val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
|
|
|
|
|
|
|
|
val i_date = dates
|
|
|
|
.filter(d => d.date.isDefined && d.dateType.isDefined)
|
|
|
|
.find(d => d.dateType.get.equalsIgnoreCase("issued"))
|
|
|
|
.map(d => extract_date(d.date.get))
|
|
|
|
val a_date: Option[String] = dates
|
2022-01-11 16:57:48 +01:00
|
|
|
.filter(d =>
|
|
|
|
d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available")
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
.map(d => extract_date(d.date.get))
|
2021-04-20 09:44:44 +02:00
|
|
|
.find(d => d != null && d.isDefined)
|
2021-01-28 16:34:46 +01:00
|
|
|
.map(d => d.get)
|
|
|
|
|
|
|
|
if (a_date.isDefined) {
|
2021-12-06 11:26:36 +01:00
|
|
|
if (doi.startsWith("10.14457"))
|
2022-01-11 16:57:48 +01:00
|
|
|
result.setEmbargoenddate(
|
|
|
|
OafMapperUtils.field(fix_thai_date(a_date.get, "[yyyy-MM-dd]"), null)
|
|
|
|
)
|
2021-06-21 10:39:46 +02:00
|
|
|
else
|
|
|
|
result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null))
|
2021-01-28 16:34:46 +01:00
|
|
|
}
|
|
|
|
if (i_date.isDefined && i_date.get.isDefined) {
|
2021-12-06 11:26:36 +01:00
|
|
|
if (doi.startsWith("10.14457")) {
|
2022-01-11 16:57:48 +01:00
|
|
|
result.setDateofacceptance(
|
|
|
|
OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null)
|
|
|
|
)
|
|
|
|
result
|
|
|
|
.getInstance()
|
|
|
|
.get(0)
|
|
|
|
.setDateofacceptance(
|
|
|
|
OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null)
|
|
|
|
)
|
|
|
|
} else {
|
2021-06-21 10:39:46 +02:00
|
|
|
result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
|
|
|
|
result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
|
|
|
|
}
|
2022-01-11 16:57:48 +01:00
|
|
|
} else if (publication_year != null) {
|
2021-12-06 11:26:36 +01:00
|
|
|
if (doi.startsWith("10.14457")) {
|
2022-01-11 16:57:48 +01:00
|
|
|
result.setDateofacceptance(
|
|
|
|
OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null)
|
|
|
|
)
|
|
|
|
result
|
|
|
|
.getInstance()
|
|
|
|
.get(0)
|
|
|
|
.setDateofacceptance(
|
|
|
|
OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null)
|
|
|
|
)
|
2021-06-21 10:39:46 +02:00
|
|
|
|
|
|
|
} else {
|
|
|
|
result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
|
2022-01-11 16:57:48 +01:00
|
|
|
result
|
|
|
|
.getInstance()
|
|
|
|
.get(0)
|
|
|
|
.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
|
2021-06-21 10:39:46 +02:00
|
|
|
}
|
2021-01-28 16:34:46 +01:00
|
|
|
}
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
result.setRelevantdate(
|
|
|
|
dates
|
|
|
|
.filter(d => d.date.isDefined && d.dateType.isDefined)
|
|
|
|
.map(d => (extract_date(d.date.get), d.dateType.get))
|
|
|
|
.filter(d => d._1.isDefined)
|
|
|
|
.map(d =>
|
|
|
|
(
|
|
|
|
d._1.get,
|
|
|
|
vocabularies.getTermAsQualifier(ModelConstants.DNET_DATACITE_DATE, d._2.toLowerCase())
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.filter(d => d._2 != null)
|
|
|
|
.map(d => generateOAFDate(d._1, d._2))
|
|
|
|
.asJava
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
val subjects = (json \\ "subjects").extract[List[SubjectType]]
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
result.setSubject(
|
|
|
|
subjects
|
|
|
|
.filter(s => s.subject.nonEmpty)
|
|
|
|
.map(s =>
|
|
|
|
OafMapperUtils.structuredProperty(
|
|
|
|
s.subject.get,
|
|
|
|
SUBJ_CLASS,
|
|
|
|
SUBJ_CLASS,
|
|
|
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
|
|
|
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
|
|
|
|
null
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.asJava
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
result.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
|
|
|
|
|
|
|
val descriptions = (json \\ "descriptions").extract[List[DescriptionType]]
|
|
|
|
|
|
|
|
result.setDescription(
|
|
|
|
descriptions
|
2022-01-11 16:57:48 +01:00
|
|
|
.filter(d => d.description.isDefined)
|
|
|
|
.map(d => OafMapperUtils.field(d.description.get, null))
|
|
|
|
.filter(s => s != null)
|
|
|
|
.asJava
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
val publisher = (json \\ "publisher").extractOrElse[String](null)
|
|
|
|
if (publisher != null)
|
|
|
|
result.setPublisher(OafMapperUtils.field(publisher, null))
|
|
|
|
|
|
|
|
val language: String = (json \\ "language").extractOrElse[String](null)
|
|
|
|
|
|
|
|
if (language != null)
|
2022-01-11 16:57:48 +01:00
|
|
|
result.setLanguage(
|
|
|
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, language)
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
val instance = result.getInstance().get(0)
|
|
|
|
|
|
|
|
val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String]
|
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
val accessRights: List[String] = for {
|
2022-01-11 16:57:48 +01:00
|
|
|
JObject(rightsList) <- json \\ "rightsList"
|
2021-01-28 16:34:46 +01:00
|
|
|
JField("rightsUri", JString(rightsUri)) <- rightsList
|
|
|
|
} yield rightsUri
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
val aRights: Option[AccessRight] = accessRights
|
|
|
|
.map(r => {
|
|
|
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_ACCESS_MODES, r)
|
|
|
|
})
|
|
|
|
.find(q => q != null)
|
|
|
|
.map(q => {
|
|
|
|
val a = new AccessRight
|
|
|
|
a.setClassid(q.getClassid)
|
|
|
|
a.setClassname(q.getClassname)
|
|
|
|
a.setSchemeid(q.getSchemeid)
|
|
|
|
a.setSchemename(q.getSchemename)
|
|
|
|
a
|
|
|
|
})
|
2021-01-28 16:34:46 +01:00
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
val access_rights_qualifier =
|
|
|
|
if (aRights.isDefined) aRights.get
|
|
|
|
else
|
|
|
|
OafMapperUtils.accessRight(
|
|
|
|
ModelConstants.UNKNOWN,
|
|
|
|
ModelConstants.NOT_AVAILABLE,
|
|
|
|
ModelConstants.DNET_ACCESS_MODES,
|
|
|
|
ModelConstants.DNET_ACCESS_MODES
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
|
|
|
|
if (client.isDefined) {
|
2021-11-22 16:03:17 +01:00
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
instance.setHostedby(
|
|
|
|
OafMapperUtils.keyValue(
|
|
|
|
generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID),
|
|
|
|
ModelConstants.UNKNOWN_REPOSITORY.getValue
|
|
|
|
)
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
|
|
|
|
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
|
2021-03-31 15:45:58 +02:00
|
|
|
instance.setAccessright(access_rights_qualifier)
|
|
|
|
instance.setPid(result.getPid)
|
2021-01-28 16:34:46 +01:00
|
|
|
val license = accessRights
|
2022-01-11 16:57:48 +01:00
|
|
|
.find(r =>
|
|
|
|
r.startsWith("http") && r.matches(
|
|
|
|
".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*"
|
|
|
|
)
|
|
|
|
)
|
2021-01-28 16:34:46 +01:00
|
|
|
if (license.isDefined)
|
|
|
|
instance.setLicense(OafMapperUtils.field(license.get, null))
|
|
|
|
}
|
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
val awardUris: List[String] = for {
|
2022-01-11 16:57:48 +01:00
|
|
|
JObject(fundingReferences) <- json \\ "fundingReferences"
|
2021-01-28 16:34:46 +01:00
|
|
|
JField("awardUri", JString(awardUri)) <- fundingReferences
|
|
|
|
} yield awardUri
|
|
|
|
|
2021-07-23 18:14:37 +02:00
|
|
|
result.setId(IdentifierFactory.createIdentifier(result))
|
2022-01-11 16:57:48 +01:00
|
|
|
var relations: List[Relation] =
|
|
|
|
awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
|
2021-06-04 10:14:22 +02:00
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
fix_figshare(result)
|
2021-07-23 18:14:37 +02:00
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
if (result.getId == null)
|
2021-03-31 15:45:58 +02:00
|
|
|
return List()
|
2021-06-04 10:14:22 +02:00
|
|
|
|
|
|
|
if (exportLinks) {
|
|
|
|
val rels: List[RelatedIdentifierType] = for {
|
2022-01-11 16:57:48 +01:00
|
|
|
JObject(relIdentifier) <- json \\ "relatedIdentifiers"
|
|
|
|
JField("relationType", JString(relationType)) <- relIdentifier
|
2021-06-04 10:14:22 +02:00
|
|
|
JField("relatedIdentifierType", JString(relatedIdentifierType)) <- relIdentifier
|
2022-01-11 16:57:48 +01:00
|
|
|
JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier
|
2021-06-04 10:14:22 +02:00
|
|
|
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
relations = relations ::: generateRelations(
|
|
|
|
rels,
|
|
|
|
result.getId,
|
|
|
|
if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null
|
|
|
|
)
|
2021-06-04 10:14:22 +02:00
|
|
|
}
|
2021-04-20 09:44:44 +02:00
|
|
|
if (relations != null && relations.nonEmpty) {
|
|
|
|
List(result) ::: relations
|
2022-01-11 16:57:48 +01:00
|
|
|
} else
|
2021-01-28 16:34:46 +01:00
|
|
|
List(result)
|
|
|
|
}
|
|
|
|
|
2022-01-11 16:57:48 +01:00
|
|
|
private def generateRelations(
|
|
|
|
rels: List[RelatedIdentifierType],
|
|
|
|
id: String,
|
|
|
|
date: String
|
|
|
|
): List[Relation] = {
|
2021-06-04 15:10:20 +02:00
|
|
|
rels
|
|
|
|
.filter(r =>
|
2022-01-11 16:57:48 +01:00
|
|
|
subRelTypeMapping
|
|
|
|
.contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
|
|
|
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
|
|
|
|
r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
|
2021-06-04 15:10:20 +02:00
|
|
|
)
|
2021-06-18 11:43:59 +02:00
|
|
|
.map(r => {
|
2021-06-04 15:10:20 +02:00
|
|
|
val rel = new Relation
|
|
|
|
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
|
|
|
rel.setDataInfo(dataInfo)
|
|
|
|
|
2021-11-25 10:54:13 +01:00
|
|
|
val subRelType = subRelTypeMapping(r.relationType).relType
|
2021-06-04 15:10:20 +02:00
|
|
|
rel.setRelType(REL_TYPE_VALUE)
|
|
|
|
rel.setSubRelType(subRelType)
|
|
|
|
rel.setRelClass(r.relationType)
|
|
|
|
|
2021-12-06 11:26:36 +01:00
|
|
|
val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
|
2021-06-29 12:02:03 +02:00
|
|
|
|
|
|
|
rel.setProperties(List(dateProps).asJava)
|
|
|
|
|
2021-06-04 15:10:20 +02:00
|
|
|
rel.setSource(id)
|
2022-01-11 16:57:48 +01:00
|
|
|
rel.setTarget(
|
|
|
|
DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
|
|
|
)
|
2021-06-24 17:20:00 +02:00
|
|
|
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
2021-10-20 17:12:08 +02:00
|
|
|
rel.getCollectedfrom.asScala.map(c => c.getValue).toList
|
2021-06-18 11:43:59 +02:00
|
|
|
rel
|
2021-11-25 10:54:13 +01:00
|
|
|
})
|
2021-06-04 15:10:20 +02:00
|
|
|
}
|
|
|
|
|
2021-01-28 16:34:46 +01:00
|
|
|
def generateDSId(input: String): String = {
|
|
|
|
val b = StringUtils.substringBefore(input, "::")
|
|
|
|
val a = StringUtils.substringAfter(input, "::")
|
|
|
|
s"10|$b::${DHPUtils.md5(a)}"
|
|
|
|
}
|
|
|
|
|
2021-12-06 11:26:36 +01:00
|
|
|
}
|