forked from D-Net/dnet-hadoop
fix resolved url format, added alternate identifier from original pid
This commit is contained in:
parent
afe84c4244
commit
f11dfc51f7
|
@ -12,12 +12,17 @@ import java.util.regex.Pattern
|
||||||
The following class are utility class used for the mapping from
|
The following class are utility class used for the mapping from
|
||||||
bioschema json datacite to OAF Schema
|
bioschema json datacite to OAF Schema
|
||||||
*/
|
*/
|
||||||
|
|
||||||
case class RelatedIdentifierType(
|
case class RelatedIdentifierType(
|
||||||
relationType: String,
|
relationType: String,
|
||||||
relatedIdentifier: String,
|
relatedIdentifier: String,
|
||||||
relatedIdentifierType: String
|
relatedIdentifierType: String
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
|
case class AlternateIdentifierType(
|
||||||
|
alternateIdentifier: String
|
||||||
|
) {}
|
||||||
|
|
||||||
case class IdentifierType(
|
case class IdentifierType(
|
||||||
identifier: String,
|
identifier: String,
|
||||||
identifierType: String
|
identifierType: String
|
||||||
|
@ -41,10 +46,10 @@ case class CreatorType(
|
||||||
case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
|
case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
|
||||||
|
|
||||||
case class SubjectType(
|
case class SubjectType(
|
||||||
schemeURI: Option[String],
|
schemeURI: Option[String],
|
||||||
value: Option[String],
|
value: Option[String],
|
||||||
subjectScheme: Option[String]
|
subjectScheme: Option[String]
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
|
case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
|
||||||
|
|
||||||
|
@ -70,9 +75,10 @@ object BioschemaModelConstants {
|
||||||
|
|
||||||
val PED_PREFIX: String = "ped_________"
|
val PED_PREFIX: String = "ped_________"
|
||||||
|
|
||||||
val resolvedURL: Map[String, String] = Map(
|
val resolvedURLPattern: Map[String, String] = Map(
|
||||||
"uniprot" -> "https://www.uniprot.org/uniprot/",
|
"https://identifiers.org/pubmed:" -> "pubmed",
|
||||||
"pubmed" -> "https://pubmed.ncbi.nlm.nih.gov/"
|
"http://purl.uniprot.org/uniprot/" -> "uniprot",
|
||||||
|
"https://identifiers.org/uniprot:" -> "uniprot"
|
||||||
)
|
)
|
||||||
|
|
||||||
val collectedFromMap: Map[String, KeyValue] = {
|
val collectedFromMap: Map[String, KeyValue] = {
|
||||||
|
|
|
@ -11,9 +11,7 @@ import org.json4s.DefaultFormats
|
||||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||||
import org.json4s.jackson.JsonMethods.parse
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
import java.text.SimpleDateFormat
|
|
||||||
import java.time.LocalDate
|
import java.time.LocalDate
|
||||||
import java.util.{Date, Locale}
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
object BioschemaToOAFTransformation {
|
object BioschemaToOAFTransformation {
|
||||||
|
@ -200,32 +198,42 @@ object BioschemaToOAFTransformation {
|
||||||
if (result.getId == null)
|
if (result.getId == null)
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
val alternativeIdentifierUrls: List[String] = for {
|
val alternativeIdentifierUrls: List[AlternateIdentifierType] = for {
|
||||||
JObject(alternateIdentifiers) <- json \\ "alternateIdentifiers"
|
JObject(alternateIdentifiers) <- json \\ "alternateIdentifiers"
|
||||||
JField("alternateIdentifier", JString(alternateIdentifier)) <- alternateIdentifiers
|
JField("alternateIdentifier", JString(alternateIdentifier)) <- alternateIdentifiers
|
||||||
foundResolvedURLId = resolvedURL
|
} yield AlternateIdentifierType(alternateIdentifier)
|
||||||
.map(k => {
|
|
||||||
if (alternateIdentifier.contains(s"${k._1}:"))
|
|
||||||
k._1
|
|
||||||
else
|
|
||||||
null
|
|
||||||
})
|
|
||||||
.find(s => s != null)
|
|
||||||
alternativeIdentifierUrl = StringUtils.substringAfter(alternateIdentifier, s"${foundResolvedURLId.get}:")
|
|
||||||
} yield alternativeIdentifierUrl
|
|
||||||
|
|
||||||
alternativeIdentifierUrls.map(id => {
|
val alternativeIdentifierUrl: AlternateIdentifierType = alternativeIdentifierUrls.asJava.get(0)
|
||||||
var alternateIdentifier: StructuredProperty = null
|
|
||||||
alternateIdentifier = OafMapperUtils.structuredProperty(
|
val alternativeIdentifiers = resolvedURLPattern
|
||||||
id,
|
.map(pattern => {
|
||||||
"uniprot",
|
if (alternativeIdentifierUrl.alternateIdentifier.startsWith(s"${pattern._1}")) {
|
||||||
"uniprot",
|
val relatedId = StringUtils.substringAfter(alternativeIdentifierUrl.alternateIdentifier, s"${pattern._1}")
|
||||||
ModelConstants.DNET_PID_TYPES,
|
OafMapperUtils.structuredProperty(
|
||||||
ModelConstants.DNET_PID_TYPES,
|
relatedId,
|
||||||
dataInfo
|
pattern._2,
|
||||||
)
|
pattern._2,
|
||||||
instance.setAlternateIdentifier(List(alternateIdentifier).asJava)
|
ModelConstants.DNET_PID_TYPES,
|
||||||
})
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
dataInfo
|
||||||
|
)
|
||||||
|
} else
|
||||||
|
null
|
||||||
|
})
|
||||||
|
.find(s => s != null)
|
||||||
|
.get
|
||||||
|
|
||||||
|
val defaultAlternatedIdentifer: StructuredProperty = OafMapperUtils.structuredProperty(
|
||||||
|
pid,
|
||||||
|
datasourceKey,
|
||||||
|
datasourceKey,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
dataInfo
|
||||||
|
)
|
||||||
|
var finalAlternativeIdentifiers: List[StructuredProperty] = List()
|
||||||
|
finalAlternativeIdentifiers = List(alternativeIdentifiers) ::: List(defaultAlternatedIdentifer)
|
||||||
|
instance.setAlternateIdentifier(finalAlternativeIdentifiers.asJava)
|
||||||
|
|
||||||
if (exportLinks) {
|
if (exportLinks) {
|
||||||
val rels: List[RelatedIdentifierType] = for {
|
val rels: List[RelatedIdentifierType] = for {
|
||||||
|
@ -272,21 +280,17 @@ object BioschemaToOAFTransformation {
|
||||||
|
|
||||||
rel.setProperties(List(dateProps).asJava)
|
rel.setProperties(List(dateProps).asJava)
|
||||||
|
|
||||||
val foundResolvedURLId = resolvedURL
|
resolvedURLPattern
|
||||||
.map(k => {
|
.map(p => {
|
||||||
if (r.relatedIdentifier.contains(s"${k._1}:"))
|
if (r.relatedIdentifier.startsWith(s"${p._1}")) {
|
||||||
k._1
|
val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${p._1}")
|
||||||
else
|
rel.setTarget(
|
||||||
|
DHPUtils.generateUnresolvedIdentifier(relatedId, p._2)
|
||||||
|
)
|
||||||
|
} else
|
||||||
null
|
null
|
||||||
})
|
})
|
||||||
.find(s => s != null);
|
.find(s => s != null)
|
||||||
if (foundResolvedURLId.nonEmpty) {
|
|
||||||
val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${foundResolvedURLId.get}:")
|
|
||||||
rel.setTarget(s"${resolvedURL(foundResolvedURLId.get)}${relatedId}")
|
|
||||||
} else
|
|
||||||
rel.setTarget(
|
|
||||||
DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
|
||||||
)
|
|
||||||
rel.setSource(id)
|
rel.setSource(id)
|
||||||
rel.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava)
|
rel.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava)
|
||||||
rel.getCollectedfrom.asScala.map(c => c.getValue).toList
|
rel.getCollectedfrom.asScala.map(c => c.getValue).toList
|
||||||
|
|
Loading…
Reference in New Issue