fix resolved url format, added alternate identifier from original pid

This commit is contained in:
Enrico Ottonello 2022-03-22 16:39:21 +01:00
parent afe84c4244
commit f11dfc51f7
2 changed files with 55 additions and 45 deletions

View File

@ -12,12 +12,17 @@ import java.util.regex.Pattern
The following class are utility class used for the mapping from
bioschema json datacite to OAF Schema
*/
case class RelatedIdentifierType(
relationType: String,
relatedIdentifier: String,
relatedIdentifierType: String
) {}
case class AlternateIdentifierType(
alternateIdentifier: String
) {}
case class IdentifierType(
identifier: String,
identifierType: String
@ -41,10 +46,10 @@ case class CreatorType(
case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
case class SubjectType(
schemeURI: Option[String],
value: Option[String],
subjectScheme: Option[String]
) {}
schemeURI: Option[String],
value: Option[String],
subjectScheme: Option[String]
) {}
case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
@ -70,9 +75,10 @@ object BioschemaModelConstants {
val PED_PREFIX: String = "ped_________"
val resolvedURL: Map[String, String] = Map(
"uniprot" -> "https://www.uniprot.org/uniprot/",
"pubmed" -> "https://pubmed.ncbi.nlm.nih.gov/"
val resolvedURLPattern: Map[String, String] = Map(
"https://identifiers.org/pubmed:" -> "pubmed",
"http://purl.uniprot.org/uniprot/" -> "uniprot",
"https://identifiers.org/uniprot:" -> "uniprot"
)
val collectedFromMap: Map[String, KeyValue] = {

View File

@ -11,9 +11,7 @@ import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString}
import org.json4s.jackson.JsonMethods.parse
import java.text.SimpleDateFormat
import java.time.LocalDate
import java.util.{Date, Locale}
import scala.collection.JavaConverters._
object BioschemaToOAFTransformation {
@ -200,32 +198,42 @@ object BioschemaToOAFTransformation {
if (result.getId == null)
return List()
val alternativeIdentifierUrls: List[String] = for {
val alternativeIdentifierUrls: List[AlternateIdentifierType] = for {
JObject(alternateIdentifiers) <- json \\ "alternateIdentifiers"
JField("alternateIdentifier", JString(alternateIdentifier)) <- alternateIdentifiers
foundResolvedURLId = resolvedURL
.map(k => {
if (alternateIdentifier.contains(s"${k._1}:"))
k._1
else
null
})
.find(s => s != null)
alternativeIdentifierUrl = StringUtils.substringAfter(alternateIdentifier, s"${foundResolvedURLId.get}:")
} yield alternativeIdentifierUrl
} yield AlternateIdentifierType(alternateIdentifier)
alternativeIdentifierUrls.map(id => {
var alternateIdentifier: StructuredProperty = null
alternateIdentifier = OafMapperUtils.structuredProperty(
id,
"uniprot",
"uniprot",
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES,
dataInfo
)
instance.setAlternateIdentifier(List(alternateIdentifier).asJava)
})
val alternativeIdentifierUrl: AlternateIdentifierType = alternativeIdentifierUrls.asJava.get(0)
val alternativeIdentifiers = resolvedURLPattern
.map(pattern => {
if (alternativeIdentifierUrl.alternateIdentifier.startsWith(s"${pattern._1}")) {
val relatedId = StringUtils.substringAfter(alternativeIdentifierUrl.alternateIdentifier, s"${pattern._1}")
OafMapperUtils.structuredProperty(
relatedId,
pattern._2,
pattern._2,
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES,
dataInfo
)
} else
null
})
.find(s => s != null)
.get
val defaultAlternatedIdentifer: StructuredProperty = OafMapperUtils.structuredProperty(
pid,
datasourceKey,
datasourceKey,
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES,
dataInfo
)
var finalAlternativeIdentifiers: List[StructuredProperty] = List()
finalAlternativeIdentifiers = List(alternativeIdentifiers) ::: List(defaultAlternatedIdentifer)
instance.setAlternateIdentifier(finalAlternativeIdentifiers.asJava)
if (exportLinks) {
val rels: List[RelatedIdentifierType] = for {
@ -272,21 +280,17 @@ object BioschemaToOAFTransformation {
rel.setProperties(List(dateProps).asJava)
val foundResolvedURLId = resolvedURL
.map(k => {
if (r.relatedIdentifier.contains(s"${k._1}:"))
k._1
else
resolvedURLPattern
.map(p => {
if (r.relatedIdentifier.startsWith(s"${p._1}")) {
val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${p._1}")
rel.setTarget(
DHPUtils.generateUnresolvedIdentifier(relatedId, p._2)
)
} else
null
})
.find(s => s != null);
if (foundResolvedURLId.nonEmpty) {
val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${foundResolvedURLId.get}:")
rel.setTarget(s"${resolvedURL(foundResolvedURLId.get)}${relatedId}")
} else
rel.setTarget(
DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
)
.find(s => s != null)
rel.setSource(id)
rel.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava)
rel.getCollectedfrom.asScala.map(c => c.getValue).toList