forked from D-Net/dnet-hadoop
fix resolved url format, added alternate identifier from original pid
This commit is contained in:
parent
afe84c4244
commit
f11dfc51f7
|
@ -12,12 +12,17 @@ import java.util.regex.Pattern
|
|||
The following class are utility class used for the mapping from
|
||||
bioschema json datacite to OAF Schema
|
||||
*/
|
||||
|
||||
case class RelatedIdentifierType(
|
||||
relationType: String,
|
||||
relatedIdentifier: String,
|
||||
relatedIdentifierType: String
|
||||
) {}
|
||||
|
||||
case class AlternateIdentifierType(
|
||||
alternateIdentifier: String
|
||||
) {}
|
||||
|
||||
case class IdentifierType(
|
||||
identifier: String,
|
||||
identifierType: String
|
||||
|
@ -41,10 +46,10 @@ case class CreatorType(
|
|||
case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
|
||||
|
||||
case class SubjectType(
|
||||
schemeURI: Option[String],
|
||||
value: Option[String],
|
||||
subjectScheme: Option[String]
|
||||
) {}
|
||||
schemeURI: Option[String],
|
||||
value: Option[String],
|
||||
subjectScheme: Option[String]
|
||||
) {}
|
||||
|
||||
case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
|
||||
|
||||
|
@ -70,9 +75,10 @@ object BioschemaModelConstants {
|
|||
|
||||
val PED_PREFIX: String = "ped_________"
|
||||
|
||||
val resolvedURL: Map[String, String] = Map(
|
||||
"uniprot" -> "https://www.uniprot.org/uniprot/",
|
||||
"pubmed" -> "https://pubmed.ncbi.nlm.nih.gov/"
|
||||
val resolvedURLPattern: Map[String, String] = Map(
|
||||
"https://identifiers.org/pubmed:" -> "pubmed",
|
||||
"http://purl.uniprot.org/uniprot/" -> "uniprot",
|
||||
"https://identifiers.org/uniprot:" -> "uniprot"
|
||||
)
|
||||
|
||||
val collectedFromMap: Map[String, KeyValue] = {
|
||||
|
|
|
@ -11,9 +11,7 @@ import org.json4s.DefaultFormats
|
|||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
||||
import java.text.SimpleDateFormat
|
||||
import java.time.LocalDate
|
||||
import java.util.{Date, Locale}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object BioschemaToOAFTransformation {
|
||||
|
@ -200,32 +198,42 @@ object BioschemaToOAFTransformation {
|
|||
if (result.getId == null)
|
||||
return List()
|
||||
|
||||
val alternativeIdentifierUrls: List[String] = for {
|
||||
val alternativeIdentifierUrls: List[AlternateIdentifierType] = for {
|
||||
JObject(alternateIdentifiers) <- json \\ "alternateIdentifiers"
|
||||
JField("alternateIdentifier", JString(alternateIdentifier)) <- alternateIdentifiers
|
||||
foundResolvedURLId = resolvedURL
|
||||
.map(k => {
|
||||
if (alternateIdentifier.contains(s"${k._1}:"))
|
||||
k._1
|
||||
else
|
||||
null
|
||||
})
|
||||
.find(s => s != null)
|
||||
alternativeIdentifierUrl = StringUtils.substringAfter(alternateIdentifier, s"${foundResolvedURLId.get}:")
|
||||
} yield alternativeIdentifierUrl
|
||||
} yield AlternateIdentifierType(alternateIdentifier)
|
||||
|
||||
alternativeIdentifierUrls.map(id => {
|
||||
var alternateIdentifier: StructuredProperty = null
|
||||
alternateIdentifier = OafMapperUtils.structuredProperty(
|
||||
id,
|
||||
"uniprot",
|
||||
"uniprot",
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
dataInfo
|
||||
)
|
||||
instance.setAlternateIdentifier(List(alternateIdentifier).asJava)
|
||||
})
|
||||
val alternativeIdentifierUrl: AlternateIdentifierType = alternativeIdentifierUrls.asJava.get(0)
|
||||
|
||||
val alternativeIdentifiers = resolvedURLPattern
|
||||
.map(pattern => {
|
||||
if (alternativeIdentifierUrl.alternateIdentifier.startsWith(s"${pattern._1}")) {
|
||||
val relatedId = StringUtils.substringAfter(alternativeIdentifierUrl.alternateIdentifier, s"${pattern._1}")
|
||||
OafMapperUtils.structuredProperty(
|
||||
relatedId,
|
||||
pattern._2,
|
||||
pattern._2,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
dataInfo
|
||||
)
|
||||
} else
|
||||
null
|
||||
})
|
||||
.find(s => s != null)
|
||||
.get
|
||||
|
||||
val defaultAlternatedIdentifer: StructuredProperty = OafMapperUtils.structuredProperty(
|
||||
pid,
|
||||
datasourceKey,
|
||||
datasourceKey,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
dataInfo
|
||||
)
|
||||
var finalAlternativeIdentifiers: List[StructuredProperty] = List()
|
||||
finalAlternativeIdentifiers = List(alternativeIdentifiers) ::: List(defaultAlternatedIdentifer)
|
||||
instance.setAlternateIdentifier(finalAlternativeIdentifiers.asJava)
|
||||
|
||||
if (exportLinks) {
|
||||
val rels: List[RelatedIdentifierType] = for {
|
||||
|
@ -272,21 +280,17 @@ object BioschemaToOAFTransformation {
|
|||
|
||||
rel.setProperties(List(dateProps).asJava)
|
||||
|
||||
val foundResolvedURLId = resolvedURL
|
||||
.map(k => {
|
||||
if (r.relatedIdentifier.contains(s"${k._1}:"))
|
||||
k._1
|
||||
else
|
||||
resolvedURLPattern
|
||||
.map(p => {
|
||||
if (r.relatedIdentifier.startsWith(s"${p._1}")) {
|
||||
val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${p._1}")
|
||||
rel.setTarget(
|
||||
DHPUtils.generateUnresolvedIdentifier(relatedId, p._2)
|
||||
)
|
||||
} else
|
||||
null
|
||||
})
|
||||
.find(s => s != null);
|
||||
if (foundResolvedURLId.nonEmpty) {
|
||||
val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${foundResolvedURLId.get}:")
|
||||
rel.setTarget(s"${resolvedURL(foundResolvedURLId.get)}${relatedId}")
|
||||
} else
|
||||
rel.setTarget(
|
||||
DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
|
||||
)
|
||||
.find(s => s != null)
|
||||
rel.setSource(id)
|
||||
rel.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava)
|
||||
rel.getCollectedfrom.asScala.map(c => c.getValue).toList
|
||||
|
|
Loading…
Reference in New Issue