added inverse relations to datacite mapping

This commit is contained in:
Sandro La Bruzzo 2021-06-04 15:10:20 +02:00
parent 5b724d9972
commit 0cdb7ccdaa
3 changed files with 80 additions and 50 deletions

View File

@ -3,9 +3,8 @@ package eu.dnetlib.dhp.actionmanager.datacite
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.action.AtomicAction import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.common.{ModelConstants, ModelSupport}
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset} import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
import eu.dnetlib.dhp.utils.DHPUtils import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.commons.lang3.StringUtils import org.apache.commons.lang3.StringUtils
@ -17,8 +16,9 @@ import java.nio.charset.CodingErrorAction
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.time.LocalDate import java.time.LocalDate
import java.time.format.DateTimeFormatter import java.time.format.DateTimeFormatter
import java.util.{Date, Locale} import java.util
import java.util.regex.Pattern import java.util.regex.Pattern
import java.util.{Date, Locale}
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import scala.io.{Codec, Source} import scala.io.{Codec, Source}
@ -46,32 +46,32 @@ object DataciteToOAFTransformation {
val REL_TYPE_VALUE:String = "resultResult" val REL_TYPE_VALUE:String = "resultResult"
val subRelTypeMapping: Map[String,String] = Map( val subRelTypeMapping: Map[String,(String,String)] = Map(
"References" ->"relationship", "References" ->("IsReferencedBy","relationship"),
"IsSupplementTo" ->"supplement", "IsSupplementTo" ->("IsSupplementedBy","supplement"),
"IsPartOf" ->"part", "IsPartOf" ->("HasPart","part"),
"HasPart" ->"part", "HasPart" ->("IsPartOf","part"),
"IsVersionOf" ->"version", "IsVersionOf" ->("HasVersion","version"),
"HasVersion" ->"version", "HasVersion" ->("IsVersionOf","version"),
"IsIdenticalTo" ->"relationship", "IsIdenticalTo" ->("IsIdenticalTo","relationship"),
"IsPreviousVersionOf" ->"version", "IsPreviousVersionOf" ->("IsNewVersionOf","version"),
"IsContinuedBy" ->"relationship", "IsContinuedBy" ->("Continues","relationship"),
"Continues" ->"relationship", "Continues" ->("IsContinuedBy","relationship"),
"IsNewVersionOf" ->"version", "IsNewVersionOf" ->("IsPreviousVersionOf","version"),
"IsSupplementedBy" ->"supplement", "IsSupplementedBy" ->("IsSupplementTo","supplement"),
"IsDocumentedBy" ->"relationship", "IsDocumentedBy" ->("Documents","relationship"),
"IsSourceOf" ->"relationship", "IsSourceOf" ->("IsDerivedFrom","relationship"),
"Cites" ->"citation", "Cites" ->("IsCitedBy","citation"),
"IsCitedBy" ->"citation", "IsCitedBy" ->("Cites","citation"),
"IsDerivedFrom" ->"relationship", "IsDerivedFrom" ->("IsSourceOf","relationship"),
"IsVariantFormOf" ->"version", "IsVariantFormOf" ->("IsDerivedFrom","version"),
"IsReferencedBy" ->"relationship", "IsReferencedBy" ->("References","relationship"),
"IsObsoletedBy" ->"version", "IsObsoletedBy" ->("IsNewVersionOf","version"),
"Reviews" ->"review", "Reviews" ->("IsReviewedBy","review"),
"Documents" ->"relationship", "Documents" ->("IsDocumentedBy","relationship"),
"IsCompiledBy" ->"relationship", "IsCompiledBy" ->("Compiles","relationship"),
"Compiles" ->"relationship", "Compiles" ->("IsCompiledBy","relationship"),
"IsReviewedBy" ->"review" "IsReviewedBy" ->("Reviews","review")
) )
implicit val codec: Codec = Codec("UTF-8") implicit val codec: Codec = Codec("UTF-8")
@ -523,26 +523,7 @@ val REL_TYPE_VALUE:String = "resultResult"
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
relations = relations ::: rels relations = relations ::: generateRelations(rels,result.getId)
.filter(r =>
subRelTypeMapping.contains(r.relationType) && (
r.relatedIdentifierType.equalsIgnoreCase("doi") ||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
r.relatedIdentifierType.equalsIgnoreCase("arxiv") )
)
.map(r => {
val rel = new Relation
val subRelType = subRelTypeMapping.get(r.relationType)
rel.setRelType(REL_TYPE_VALUE)
rel.setSubRelType(subRelType.get)
rel.setRelClass(r.relationType)
rel.setSource(result.getId)
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
rel.setDataInfo(dataInfo)
rel.setTarget(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|"))
rel
})
} }
if (relations != null && relations.nonEmpty) { if (relations != null && relations.nonEmpty) {
List(result) ::: relations List(result) ::: relations
@ -551,6 +532,47 @@ val REL_TYPE_VALUE:String = "resultResult"
List(result) List(result)
} }
private def generateRelations(rels: List[RelatedIdentifierType], id:String):List[Relation] = {
rels
.filter(r =>
subRelTypeMapping.contains(r.relationType) && (
r.relatedIdentifierType.equalsIgnoreCase("doi") ||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
)
.flatMap(r => {
val rel = new Relation
val inverseRel = new Relation
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
rel.setDataInfo(dataInfo)
inverseRel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
inverseRel.setDataInfo(dataInfo)
val subRelType = subRelTypeMapping(r.relationType)._2
val inverseRelSemantic = subRelTypeMapping(r.relationType)._1
val inversesubRelType = subRelTypeMapping(inverseRelSemantic)._2
rel.setRelType(REL_TYPE_VALUE)
rel.setSubRelType(subRelType)
rel.setRelClass(r.relationType)
inverseRel.setRelType(REL_TYPE_VALUE)
inverseRel.setSubRelType(inversesubRelType)
inverseRel.setRelClass(inverseRelSemantic)
rel.setSource(id)
rel.setTarget(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|"))
inverseRel.setTarget(id)
inverseRel.setSource(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|"))
List(rel, inverseRel)
})
}
def generateDataInfo(trust: String): DataInfo = { def generateDataInfo(trust: String): DataInfo = {
val di = new DataInfo val di = new DataInfo
di.setDeletedbyinference(false) di.setDeletedbyinference(false)

View File

@ -23,5 +23,12 @@
"paramLongName": "isLookupUrl", "paramLongName": "isLookupUrl",
"paramDescription": "the isLookup URL", "paramDescription": "the isLookup URL",
"paramRequired": true "paramRequired": true
},
{
"paramName": "l",
"paramLongName": "exportLinks",
"paramDescription": "should export also links",
"paramRequired": false
} }
] ]

View File

@ -16,7 +16,7 @@
</parameters> </parameters>
<start to="ImportDatacite"/> <start to="TransformJob"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -70,6 +70,7 @@
<arg>--sourcePath</arg><arg>${mainPath}/datacite_dump</arg> <arg>--sourcePath</arg><arg>${mainPath}/datacite_dump</arg>
<arg>--targetPath</arg><arg>${mainPath}/datacite_oaf</arg> <arg>--targetPath</arg><arg>${mainPath}/datacite_oaf</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--exportLinks</arg><arg>true</arg>
<arg>--master</arg><arg>yarn-cluster</arg> <arg>--master</arg><arg>yarn-cluster</arg>
</spark> </spark>
<ok to="End"/> <ok to="End"/>