From 0cdb7ccdaab3f6e9a866723e5d1631fb3cf72013 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 4 Jun 2021 15:10:20 +0200 Subject: [PATCH] added inverse relations to datacite mapping --- .../DataciteToOAFTransformation.scala | 120 +++++++++++------- .../datacite/generate_dataset_params.json | 7 + .../datacite/oozie_app/workflow.xml | 3 +- 3 files changed, 80 insertions(+), 50 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index aab2488a3..6005c7e95 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -3,9 +3,8 @@ package eu.dnetlib.dhp.actionmanager.datacite import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.common.{ModelConstants, ModelSupport} import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType} +import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils @@ -17,8 +16,9 @@ import java.nio.charset.CodingErrorAction import java.text.SimpleDateFormat import java.time.LocalDate import java.time.format.DateTimeFormatter -import java.util.{Date, Locale} +import java.util import java.util.regex.Pattern +import java.util.{Date, Locale} import scala.collection.JavaConverters._ import scala.io.{Codec, Source} @@ -46,32 +46,32 @@ object DataciteToOAFTransformation { val REL_TYPE_VALUE:String = "resultResult" - val subRelTypeMapping: Map[String,String] = Map( - "References" ->"relationship", - "IsSupplementTo" ->"supplement", - "IsPartOf" ->"part", - "HasPart" ->"part", - "IsVersionOf" ->"version", - "HasVersion" ->"version", - "IsIdenticalTo" ->"relationship", - "IsPreviousVersionOf" ->"version", - "IsContinuedBy" ->"relationship", - "Continues" ->"relationship", - "IsNewVersionOf" ->"version", - "IsSupplementedBy" ->"supplement", - "IsDocumentedBy" ->"relationship", - "IsSourceOf" ->"relationship", - "Cites" ->"citation", - "IsCitedBy" ->"citation", - "IsDerivedFrom" ->"relationship", - "IsVariantFormOf" ->"version", - "IsReferencedBy" ->"relationship", - "IsObsoletedBy" ->"version", - "Reviews" ->"review", - "Documents" ->"relationship", - "IsCompiledBy" ->"relationship", - "Compiles" ->"relationship", - "IsReviewedBy" ->"review" + val subRelTypeMapping: Map[String,(String,String)] = Map( + "References" ->("IsReferencedBy","relationship"), + "IsSupplementTo" ->("IsSupplementedBy","supplement"), + "IsPartOf" ->("HasPart","part"), + "HasPart" ->("IsPartOf","part"), + "IsVersionOf" ->("HasVersion","version"), + "HasVersion" ->("IsVersionOf","version"), + "IsIdenticalTo" ->("IsIdenticalTo","relationship"), + "IsPreviousVersionOf" ->("IsNewVersionOf","version"), + "IsContinuedBy" ->("Continues","relationship"), + "Continues" ->("IsContinuedBy","relationship"), + "IsNewVersionOf" ->("IsPreviousVersionOf","version"), + "IsSupplementedBy" ->("IsSupplementTo","supplement"), + "IsDocumentedBy" ->("Documents","relationship"), + "IsSourceOf" ->("IsDerivedFrom","relationship"), + "Cites" ->("IsCitedBy","citation"), + "IsCitedBy" ->("Cites","citation"), + "IsDerivedFrom" ->("IsSourceOf","relationship"), + "IsVariantFormOf" ->("IsDerivedFrom","version"), + "IsReferencedBy" ->("References","relationship"), + "IsObsoletedBy" ->("IsNewVersionOf","version"), + "Reviews" ->("IsReviewedBy","review"), + "Documents" ->("IsDocumentedBy","relationship"), + "IsCompiledBy" ->("Compiles","relationship"), + "Compiles" ->("IsCompiledBy","relationship"), + "IsReviewedBy" ->("Reviews","review") ) implicit val codec: Codec = Codec("UTF-8") @@ -523,26 +523,7 @@ val REL_TYPE_VALUE:String = "resultResult" } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) - relations = relations ::: rels - .filter(r => - subRelTypeMapping.contains(r.relationType) && ( - r.relatedIdentifierType.equalsIgnoreCase("doi") || - r.relatedIdentifierType.equalsIgnoreCase("pmid") || - r.relatedIdentifierType.equalsIgnoreCase("arxiv") ) - ) - .map(r => { - val rel = new Relation - - val subRelType = subRelTypeMapping.get(r.relationType) - rel.setRelType(REL_TYPE_VALUE) - rel.setSubRelType(subRelType.get) - rel.setRelClass(r.relationType) - rel.setSource(result.getId) - rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) - rel.setDataInfo(dataInfo) - rel.setTarget(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|")) - rel - }) + relations = relations ::: generateRelations(rels,result.getId) } if (relations != null && relations.nonEmpty) { List(result) ::: relations @@ -551,6 +532,47 @@ val REL_TYPE_VALUE:String = "resultResult" List(result) } + private def generateRelations(rels: List[RelatedIdentifierType], id:String):List[Relation] = { + rels + .filter(r => + subRelTypeMapping.contains(r.relationType) && ( + r.relatedIdentifierType.equalsIgnoreCase("doi") || + r.relatedIdentifierType.equalsIgnoreCase("pmid") || + r.relatedIdentifierType.equalsIgnoreCase("arxiv")) + ) + .flatMap(r => { + val rel = new Relation + val inverseRel = new Relation + rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) + rel.setDataInfo(dataInfo) + + inverseRel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) + inverseRel.setDataInfo(dataInfo) + + val subRelType = subRelTypeMapping(r.relationType)._2 + val inverseRelSemantic = subRelTypeMapping(r.relationType)._1 + val inversesubRelType = subRelTypeMapping(inverseRelSemantic)._2 + + + rel.setRelType(REL_TYPE_VALUE) + rel.setSubRelType(subRelType) + rel.setRelClass(r.relationType) + + + inverseRel.setRelType(REL_TYPE_VALUE) + inverseRel.setSubRelType(inversesubRelType) + inverseRel.setRelClass(inverseRelSemantic) + + rel.setSource(id) + rel.setTarget(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|")) + + inverseRel.setTarget(id) + inverseRel.setSource(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|")) + + List(rel, inverseRel) + }) + } + def generateDataInfo(trust: String): DataInfo = { val di = new DataInfo di.setDeletedbyinference(false) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json index dea037fd4..67e7f37dc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json @@ -23,5 +23,12 @@ "paramLongName": "isLookupUrl", "paramDescription": "the isLookup URL", "paramRequired": true + }, + { + "paramName": "l", + "paramLongName": "exportLinks", + "paramDescription": "should export also links", + "paramRequired": false } + ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index 30a7448a8..036178b37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -16,7 +16,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -70,6 +70,7 @@ --sourcePath${mainPath}/datacite_dump --targetPath${mainPath}/datacite_oaf --isLookupUrl${isLookupUrl} + --exportLinkstrue --masteryarn-cluster