From 7f9636ef0084dd82a08604d76082b808a02f886a Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 25 Feb 2022 14:42:08 +0100 Subject: [PATCH] added alternateIdentifiers to oaf --- .../bioschema/BioschemaModelConstants.scala | 5 + .../BioschemaToOAFTransformation.scala | 94 +++++++++++-------- 2 files changed, 61 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala index 48009b936..b2ecb1a24 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala @@ -28,6 +28,11 @@ case class RelatedIdentifierType( relatedIdentifierType: String ) {} +case class IdentifierType( + identifier: String, + identifierType: String +) {} + case class NameIdentifiersType( nameIdentifierScheme: Option[String], schemeUri: Option[String], diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaToOAFTransformation.scala index ac8bf10ec..4d4ab98ce 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaToOAFTransformation.scala @@ -29,15 +29,15 @@ object BioschemaToOAFTransformation { ) val resolvedURL: Map[String, String] = Map( - "uniprot" -> "https://www.uniprot.org/uniprot/", - "pubmed" -> "https://pubmed.ncbi.nlm.nih.gov/" + "uniprot" -> "https://www.uniprot.org/uniprot/", + "pubmed" -> "https://pubmed.ncbi.nlm.nih.gov/" ) val collectedFromMap: Map[String, KeyValue] = { val PEDCollectedFrom: KeyValue = OafMapperUtils.keyValue( //TODO create pedDatasourceId and update this value "10|ped_________::pedDatasourceId", - "PED" + "Protein Ensemble Database" ) PEDCollectedFrom.setDataInfo(DATA_INFO) @@ -254,19 +254,8 @@ object BioschemaToOAFTransformation { titles .filter(t => t.title.nonEmpty) .map(t => { - if (t.titleType.isEmpty) { - OafMapperUtils - .structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null) - } else { - OafMapperUtils.structuredProperty( - t.title.get, - t.titleType.get, - t.titleType.get, - ModelConstants.DNET_DATACITE_TITLE, - ModelConstants.DNET_DATACITE_TITLE, - null - ) - } + OafMapperUtils + .structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo) }) .asJava ) @@ -339,8 +328,20 @@ object BioschemaToOAFTransformation { val instance = result.getInstance().get(0) + val ids: List[IdentifierType] = for { + JObject(identifiers) <- json \\ "identifiers" + JField("identifier", JString(identifier)) <- identifiers + JField("identifierType", JString(identifierType)) <- identifiers + } yield IdentifierType(identifier, identifierType) + + instance.setUrl( + ids + .map(id => { + id.identifier + }) + .asJava + ) instance.setCollectedfrom(collectedFromMap("ped")) - instance.setUrl(List(s"https://proteinensemble.org/$pid").asJava) instance.setPid(result.getPid) result.setId(IdentifierFactory.createIdentifier(result)) @@ -349,12 +350,39 @@ object BioschemaToOAFTransformation { if (result.getId == null) return List() + val alternativeIdentifierUrls: List[String] = for { + JObject(alternateIdentifiers) <- json \\ "alternateIdentifiers" + JField("alternateIdentifier", JString(alternateIdentifier)) <- alternateIdentifiers + foundResolvedURLId = resolvedURL + .map(k => { + if (alternateIdentifier.contains(s"${k._1}:")) + k._1 + else + null + }) + .find(s => s != null) + alternativeIdentifierUrl = StringUtils.substringAfter(alternateIdentifier, s"${foundResolvedURLId.get}:") + } yield alternativeIdentifierUrl + + alternativeIdentifierUrls.map(id => { + var alternateIdentifier: StructuredProperty = null + alternateIdentifier = OafMapperUtils.structuredProperty( + id, + "uniprot", + "uniprot", + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, + dataInfo + ) + instance.setAlternateIdentifier(List(alternateIdentifier).asJava) + }) + if (exportLinks) { val rels: List[RelatedIdentifierType] = for { - JObject(relIdentifier) <- json \\ "relatedIdentifiers" - JField("relationType", JString(relationType)) <- relIdentifier + JObject(relIdentifier) <- json \\ "relatedIdentifiers" + JField("relationType", JString(relationType)) <- relIdentifier JField("relatedIdentifierType", JString(relatedIdentifierType)) <- relIdentifier - JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier + JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) relations = relations ::: generateRelations( @@ -364,18 +392,6 @@ object BioschemaToOAFTransformation { pid ) - val identifiers: List[RelatedIdentifierType] = for { - JObject(alternateIdentifier) <- json \\ "alternateIdentifiers" - JField("alternateIdentifier", JString(alternateIdentifierValue)) <- alternateIdentifier - } yield RelatedIdentifierType("IsIdenticalTo", alternateIdentifierValue, "URL") - - relations = relations ::: generateRelations( - identifiers, - result.getId, - if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null, - pid - ) - } if (relations != null && relations.nonEmpty) { List(result) ::: relations @@ -404,12 +420,14 @@ object BioschemaToOAFTransformation { rel.setProperties(List(dateProps).asJava) - val foundResolvedURLId = resolvedURL.map(k => { - if (r.relatedIdentifier.contains(s"${k._1}:")) - k._1 - else - null - }).find(s => s != null); + val foundResolvedURLId = resolvedURL + .map(k => { + if (r.relatedIdentifier.contains(s"${k._1}:")) + k._1 + else + null + }) + .find(s => s != null); if (foundResolvedURLId.nonEmpty) { val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${foundResolvedURLId.get}:") rel.setTarget(s"${resolvedURL(foundResolvedURLId.get)}${relatedId}")