From af1c2634b3fc61c208dfd8e57fff1b11459dba5f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 29 Nov 2023 12:45:30 +0100 Subject: [PATCH] added instanceTypeMapping original field in the mapping of - DOIBoost - Datacite - PubMed - Scholexplorer Datasource --- .../DataciteToOAFTransformation.scala | 15 ++++++++----- .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 21 +++++++++++++++++-- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 18 ++++++++++++---- .../doiboost/crossref/Crossref2Oaf.scala | 8 +++++-- 4 files changed, 49 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index 45f5f9729..17f1b90b0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -166,7 +166,7 @@ object DataciteToOAFTransformation { resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup - ): (Qualifier, Qualifier) = { + ): (Qualifier, Qualifier, String) = { if (resourceType != null && resourceType.nonEmpty) { val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) @@ -176,7 +176,7 @@ object DataciteToOAFTransformation { vocabularies.getSynonymAsQualifier( ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid - ) + ), resourceType ) } if (schemaOrg != null && schemaOrg.nonEmpty) { @@ -188,7 +188,7 @@ object DataciteToOAFTransformation { vocabularies.getSynonymAsQualifier( ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid - ) + ), schemaOrg ) } @@ -203,7 +203,7 @@ object DataciteToOAFTransformation { vocabularies.getSynonymAsQualifier( ModelConstants.DNET_RESULT_TYPOLOGIES, typeQualifier.getClassid - ) + ), resourceTypeGeneral ) } @@ -216,12 +216,17 @@ object DataciteToOAFTransformation { schemaOrg: String, vocabularies: VocabularyGroup ): Result = { - val typeQualifiers: (Qualifier, Qualifier) = + val typeQualifiers: (Qualifier, Qualifier, String) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) if (typeQualifiers == null) return null val i = new Instance i.setInstancetype(typeQualifiers._1) + // ADD ORIGINAL TYPE + val itm = new InstanceTypeMapping + itm.setOriginalType(typeQualifiers._3) + i.setInstanceTypeMapping(List(itm).asJava) + typeQualifiers._2.getClassname match { case "dataset" => val r = new OafDataset diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index c079f7537..3624c5369 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -176,7 +176,7 @@ object BioDBToOAF { i.setUrl(List(s"${resolvedURL(input.pidType)}${input.pid}").asJava) } - if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) + if (input.pidType.equalsIgnoreCase("clinicaltrials.gov")) { i.setInstancetype( OafMapperUtils.qualifier( "0037", @@ -185,7 +185,10 @@ object BioDBToOAF { ModelConstants.DNET_PUBLICATION_RESOURCE ) ) - else + val itm = new InstanceTypeMapping + itm.setOriginalType(input.pidType) + i.setInstanceTypeMapping(List(itm).asJava) + } else { i.setInstancetype( OafMapperUtils.qualifier( "0046", @@ -194,6 +197,10 @@ object BioDBToOAF { ModelConstants.DNET_PUBLICATION_RESOURCE ) ) + val itm = new InstanceTypeMapping + itm.setOriginalType("Bioentity") + i.setInstanceTypeMapping(List(itm).asJava) + } if (input.datasource == null || input.datasource.isEmpty) return null @@ -265,6 +272,9 @@ object BioDBToOAF { ModelConstants.DNET_PUBLICATION_RESOURCE ) ) + val itm = new InstanceTypeMapping + itm.setOriginalType("Bioentity") + i.setInstanceTypeMapping(List(itm).asJava) i.setCollectedfrom(collectedFromMap("uniprot")) d.setInstance(List(i).asJava) @@ -471,6 +481,9 @@ object BioDBToOAF { ModelConstants.DNET_PUBLICATION_RESOURCE ) ) + val itm = new InstanceTypeMapping + itm.setOriginalType("Bioentity") + i.setInstanceTypeMapping(List(itm).asJava) i.setCollectedfrom(collectedFromMap("pdb")) d.setInstance(List(i).asJava) @@ -571,6 +584,10 @@ object BioDBToOAF { ModelConstants.DNET_PUBLICATION_RESOURCE ) ) + val itm = new InstanceTypeMapping + itm.setOriginalType("Bioentity") + i.setInstanceTypeMapping(List(itm).asJava) + i.setCollectedfrom(collectedFromMap("ebi")) d.setInstance(List(i).asJava) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 410686f97..40ac48a7a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -188,12 +188,22 @@ object PubMedToOaf { val cojbCategory = getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) pubmedInstance.setInstancetype(cojbCategory) + // ADD ORIGINAL TYPE to the publication + val itm = new InstanceTypeMapping + itm.setOriginalType(ja.get.getValue) + pubmedInstance.setInstanceTypeMapping(List(itm).asJava) } else { val i_type = article.getPublicationTypes.asScala - .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) - .find(q => q != null) - if (i_type.isDefined) - pubmedInstance.setInstancetype(i_type.get) + .map(s => (s.getValue,getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue))) + .find(q => q._2 != null) + + if (i_type.isDefined) { + pubmedInstance.setInstancetype(i_type.get._2) + // ADD ORIGINAL TYPE to the publication + val itm = new InstanceTypeMapping + itm.setOriginalType(i_type.get._1) + pubmedInstance.setInstanceTypeMapping(List(itm).asJava) + } else return null } diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 565d34e62..f45c303f0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -107,7 +107,7 @@ case object Crossref2Oaf { .map(f => f.id) } - def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { + def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType:String): Result = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats //MAPPING Crossref DOI into PID @@ -283,6 +283,10 @@ case object Crossref2Oaf { ModelConstants.DNET_PUBLICATION_RESOURCE ) ) + //ADD ORIGINAL TYPE to the mapping + val itm = new InstanceTypeMapping + itm.setOriginalType(originalType) + instance.setInstanceTypeMapping(List(itm).asJava) result.setResourcetype( OafMapperUtils.qualifier( cobjCategory.substring(0, 4), @@ -367,7 +371,7 @@ case object Crossref2Oaf { objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type") ) - mappingResult(result, json, cOBJCategory) + mappingResult(result, json, cOBJCategory, originalType) if (result == null || result.getId == null) return List()