fix resolved url format, added alternate identifier from original pid

This commit is contained in:
Enrico Ottonello 2022-03-22 16:39:21 +01:00
parent afe84c4244
commit f11dfc51f7
2 changed files with 55 additions and 45 deletions

View File

@ -12,12 +12,17 @@ import java.util.regex.Pattern
The following class are utility class used for the mapping from The following class are utility class used for the mapping from
bioschema json datacite to OAF Schema bioschema json datacite to OAF Schema
*/ */
case class RelatedIdentifierType( case class RelatedIdentifierType(
relationType: String, relationType: String,
relatedIdentifier: String, relatedIdentifier: String,
relatedIdentifierType: String relatedIdentifierType: String
) {} ) {}
case class AlternateIdentifierType(
alternateIdentifier: String
) {}
case class IdentifierType( case class IdentifierType(
identifier: String, identifier: String,
identifierType: String identifierType: String
@ -41,10 +46,10 @@ case class CreatorType(
case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {} case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
case class SubjectType( case class SubjectType(
schemeURI: Option[String], schemeURI: Option[String],
value: Option[String], value: Option[String],
subjectScheme: Option[String] subjectScheme: Option[String]
) {} ) {}
case class DescriptionType(descriptionType: Option[String], description: Option[String]) {} case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
@ -70,9 +75,10 @@ object BioschemaModelConstants {
val PED_PREFIX: String = "ped_________" val PED_PREFIX: String = "ped_________"
val resolvedURL: Map[String, String] = Map( val resolvedURLPattern: Map[String, String] = Map(
"uniprot" -> "https://www.uniprot.org/uniprot/", "https://identifiers.org/pubmed:" -> "pubmed",
"pubmed" -> "https://pubmed.ncbi.nlm.nih.gov/" "http://purl.uniprot.org/uniprot/" -> "uniprot",
"https://identifiers.org/uniprot:" -> "uniprot"
) )
val collectedFromMap: Map[String, KeyValue] = { val collectedFromMap: Map[String, KeyValue] = {

View File

@ -11,9 +11,7 @@ import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.JsonAST.{JField, JObject, JString}
import org.json4s.jackson.JsonMethods.parse import org.json4s.jackson.JsonMethods.parse
import java.text.SimpleDateFormat
import java.time.LocalDate import java.time.LocalDate
import java.util.{Date, Locale}
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
object BioschemaToOAFTransformation { object BioschemaToOAFTransformation {
@ -200,32 +198,42 @@ object BioschemaToOAFTransformation {
if (result.getId == null) if (result.getId == null)
return List() return List()
val alternativeIdentifierUrls: List[String] = for { val alternativeIdentifierUrls: List[AlternateIdentifierType] = for {
JObject(alternateIdentifiers) <- json \\ "alternateIdentifiers" JObject(alternateIdentifiers) <- json \\ "alternateIdentifiers"
JField("alternateIdentifier", JString(alternateIdentifier)) <- alternateIdentifiers JField("alternateIdentifier", JString(alternateIdentifier)) <- alternateIdentifiers
foundResolvedURLId = resolvedURL } yield AlternateIdentifierType(alternateIdentifier)
.map(k => {
if (alternateIdentifier.contains(s"${k._1}:"))
k._1
else
null
})
.find(s => s != null)
alternativeIdentifierUrl = StringUtils.substringAfter(alternateIdentifier, s"${foundResolvedURLId.get}:")
} yield alternativeIdentifierUrl
alternativeIdentifierUrls.map(id => { val alternativeIdentifierUrl: AlternateIdentifierType = alternativeIdentifierUrls.asJava.get(0)
var alternateIdentifier: StructuredProperty = null
alternateIdentifier = OafMapperUtils.structuredProperty( val alternativeIdentifiers = resolvedURLPattern
id, .map(pattern => {
"uniprot", if (alternativeIdentifierUrl.alternateIdentifier.startsWith(s"${pattern._1}")) {
"uniprot", val relatedId = StringUtils.substringAfter(alternativeIdentifierUrl.alternateIdentifier, s"${pattern._1}")
ModelConstants.DNET_PID_TYPES, OafMapperUtils.structuredProperty(
ModelConstants.DNET_PID_TYPES, relatedId,
dataInfo pattern._2,
) pattern._2,
instance.setAlternateIdentifier(List(alternateIdentifier).asJava) ModelConstants.DNET_PID_TYPES,
}) ModelConstants.DNET_PID_TYPES,
dataInfo
)
} else
null
})
.find(s => s != null)
.get
val defaultAlternatedIdentifer: StructuredProperty = OafMapperUtils.structuredProperty(
pid,
datasourceKey,
datasourceKey,
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES,
dataInfo
)
var finalAlternativeIdentifiers: List[StructuredProperty] = List()
finalAlternativeIdentifiers = List(alternativeIdentifiers) ::: List(defaultAlternatedIdentifer)
instance.setAlternateIdentifier(finalAlternativeIdentifiers.asJava)
if (exportLinks) { if (exportLinks) {
val rels: List[RelatedIdentifierType] = for { val rels: List[RelatedIdentifierType] = for {
@ -272,21 +280,17 @@ object BioschemaToOAFTransformation {
rel.setProperties(List(dateProps).asJava) rel.setProperties(List(dateProps).asJava)
val foundResolvedURLId = resolvedURL resolvedURLPattern
.map(k => { .map(p => {
if (r.relatedIdentifier.contains(s"${k._1}:")) if (r.relatedIdentifier.startsWith(s"${p._1}")) {
k._1 val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${p._1}")
else rel.setTarget(
DHPUtils.generateUnresolvedIdentifier(relatedId, p._2)
)
} else
null null
}) })
.find(s => s != null); .find(s => s != null)
if (foundResolvedURLId.nonEmpty) {
val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${foundResolvedURLId.get}:")
rel.setTarget(s"${resolvedURL(foundResolvedURLId.get)}${relatedId}")
} else
rel.setTarget(
DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
)
rel.setSource(id) rel.setSource(id)
rel.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava) rel.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava)
rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel.getCollectedfrom.asScala.map(c => c.getValue).toList