forked from antonis.lempesis/dnet-hadoop
added inverse relations to datacite mapping
This commit is contained in:
parent
5b724d9972
commit
0cdb7ccdaa
|
@ -3,9 +3,8 @@ package eu.dnetlib.dhp.actionmanager.datacite
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||||
import eu.dnetlib.dhp.schema.common.{ModelConstants, ModelSupport}
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType}
|
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
||||||
import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
|
import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import org.apache.commons.lang3.StringUtils
|
import org.apache.commons.lang3.StringUtils
|
||||||
|
@ -17,8 +16,9 @@ import java.nio.charset.CodingErrorAction
|
||||||
import java.text.SimpleDateFormat
|
import java.text.SimpleDateFormat
|
||||||
import java.time.LocalDate
|
import java.time.LocalDate
|
||||||
import java.time.format.DateTimeFormatter
|
import java.time.format.DateTimeFormatter
|
||||||
import java.util.{Date, Locale}
|
import java.util
|
||||||
import java.util.regex.Pattern
|
import java.util.regex.Pattern
|
||||||
|
import java.util.{Date, Locale}
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.io.{Codec, Source}
|
import scala.io.{Codec, Source}
|
||||||
|
|
||||||
|
@ -46,32 +46,32 @@ object DataciteToOAFTransformation {
|
||||||
|
|
||||||
val REL_TYPE_VALUE:String = "resultResult"
|
val REL_TYPE_VALUE:String = "resultResult"
|
||||||
|
|
||||||
val subRelTypeMapping: Map[String,String] = Map(
|
val subRelTypeMapping: Map[String,(String,String)] = Map(
|
||||||
"References" ->"relationship",
|
"References" ->("IsReferencedBy","relationship"),
|
||||||
"IsSupplementTo" ->"supplement",
|
"IsSupplementTo" ->("IsSupplementedBy","supplement"),
|
||||||
"IsPartOf" ->"part",
|
"IsPartOf" ->("HasPart","part"),
|
||||||
"HasPart" ->"part",
|
"HasPart" ->("IsPartOf","part"),
|
||||||
"IsVersionOf" ->"version",
|
"IsVersionOf" ->("HasVersion","version"),
|
||||||
"HasVersion" ->"version",
|
"HasVersion" ->("IsVersionOf","version"),
|
||||||
"IsIdenticalTo" ->"relationship",
|
"IsIdenticalTo" ->("IsIdenticalTo","relationship"),
|
||||||
"IsPreviousVersionOf" ->"version",
|
"IsPreviousVersionOf" ->("IsNewVersionOf","version"),
|
||||||
"IsContinuedBy" ->"relationship",
|
"IsContinuedBy" ->("Continues","relationship"),
|
||||||
"Continues" ->"relationship",
|
"Continues" ->("IsContinuedBy","relationship"),
|
||||||
"IsNewVersionOf" ->"version",
|
"IsNewVersionOf" ->("IsPreviousVersionOf","version"),
|
||||||
"IsSupplementedBy" ->"supplement",
|
"IsSupplementedBy" ->("IsSupplementTo","supplement"),
|
||||||
"IsDocumentedBy" ->"relationship",
|
"IsDocumentedBy" ->("Documents","relationship"),
|
||||||
"IsSourceOf" ->"relationship",
|
"IsSourceOf" ->("IsDerivedFrom","relationship"),
|
||||||
"Cites" ->"citation",
|
"Cites" ->("IsCitedBy","citation"),
|
||||||
"IsCitedBy" ->"citation",
|
"IsCitedBy" ->("Cites","citation"),
|
||||||
"IsDerivedFrom" ->"relationship",
|
"IsDerivedFrom" ->("IsSourceOf","relationship"),
|
||||||
"IsVariantFormOf" ->"version",
|
"IsVariantFormOf" ->("IsDerivedFrom","version"),
|
||||||
"IsReferencedBy" ->"relationship",
|
"IsReferencedBy" ->("References","relationship"),
|
||||||
"IsObsoletedBy" ->"version",
|
"IsObsoletedBy" ->("IsNewVersionOf","version"),
|
||||||
"Reviews" ->"review",
|
"Reviews" ->("IsReviewedBy","review"),
|
||||||
"Documents" ->"relationship",
|
"Documents" ->("IsDocumentedBy","relationship"),
|
||||||
"IsCompiledBy" ->"relationship",
|
"IsCompiledBy" ->("Compiles","relationship"),
|
||||||
"Compiles" ->"relationship",
|
"Compiles" ->("IsCompiledBy","relationship"),
|
||||||
"IsReviewedBy" ->"review"
|
"IsReviewedBy" ->("Reviews","review")
|
||||||
)
|
)
|
||||||
|
|
||||||
implicit val codec: Codec = Codec("UTF-8")
|
implicit val codec: Codec = Codec("UTF-8")
|
||||||
|
@ -523,26 +523,7 @@ val REL_TYPE_VALUE:String = "resultResult"
|
||||||
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
|
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
|
||||||
|
|
||||||
|
|
||||||
relations = relations ::: rels
|
relations = relations ::: generateRelations(rels,result.getId)
|
||||||
.filter(r =>
|
|
||||||
subRelTypeMapping.contains(r.relationType) && (
|
|
||||||
r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
|
||||||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
|
|
||||||
r.relatedIdentifierType.equalsIgnoreCase("arxiv") )
|
|
||||||
)
|
|
||||||
.map(r => {
|
|
||||||
val rel = new Relation
|
|
||||||
|
|
||||||
val subRelType = subRelTypeMapping.get(r.relationType)
|
|
||||||
rel.setRelType(REL_TYPE_VALUE)
|
|
||||||
rel.setSubRelType(subRelType.get)
|
|
||||||
rel.setRelClass(r.relationType)
|
|
||||||
rel.setSource(result.getId)
|
|
||||||
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
|
||||||
rel.setDataInfo(dataInfo)
|
|
||||||
rel.setTarget(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|"))
|
|
||||||
rel
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
if (relations != null && relations.nonEmpty) {
|
if (relations != null && relations.nonEmpty) {
|
||||||
List(result) ::: relations
|
List(result) ::: relations
|
||||||
|
@ -551,6 +532,47 @@ val REL_TYPE_VALUE:String = "resultResult"
|
||||||
List(result)
|
List(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def generateRelations(rels: List[RelatedIdentifierType], id:String):List[Relation] = {
|
||||||
|
rels
|
||||||
|
.filter(r =>
|
||||||
|
subRelTypeMapping.contains(r.relationType) && (
|
||||||
|
r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
||||||
|
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
|
||||||
|
r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
|
||||||
|
)
|
||||||
|
.flatMap(r => {
|
||||||
|
val rel = new Relation
|
||||||
|
val inverseRel = new Relation
|
||||||
|
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
||||||
|
rel.setDataInfo(dataInfo)
|
||||||
|
|
||||||
|
inverseRel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
||||||
|
inverseRel.setDataInfo(dataInfo)
|
||||||
|
|
||||||
|
val subRelType = subRelTypeMapping(r.relationType)._2
|
||||||
|
val inverseRelSemantic = subRelTypeMapping(r.relationType)._1
|
||||||
|
val inversesubRelType = subRelTypeMapping(inverseRelSemantic)._2
|
||||||
|
|
||||||
|
|
||||||
|
rel.setRelType(REL_TYPE_VALUE)
|
||||||
|
rel.setSubRelType(subRelType)
|
||||||
|
rel.setRelClass(r.relationType)
|
||||||
|
|
||||||
|
|
||||||
|
inverseRel.setRelType(REL_TYPE_VALUE)
|
||||||
|
inverseRel.setSubRelType(inversesubRelType)
|
||||||
|
inverseRel.setRelClass(inverseRelSemantic)
|
||||||
|
|
||||||
|
rel.setSource(id)
|
||||||
|
rel.setTarget(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|"))
|
||||||
|
|
||||||
|
inverseRel.setTarget(id)
|
||||||
|
inverseRel.setSource(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|"))
|
||||||
|
|
||||||
|
List(rel, inverseRel)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
def generateDataInfo(trust: String): DataInfo = {
|
def generateDataInfo(trust: String): DataInfo = {
|
||||||
val di = new DataInfo
|
val di = new DataInfo
|
||||||
di.setDeletedbyinference(false)
|
di.setDeletedbyinference(false)
|
||||||
|
|
|
@ -23,5 +23,12 @@
|
||||||
"paramLongName": "isLookupUrl",
|
"paramLongName": "isLookupUrl",
|
||||||
"paramDescription": "the isLookup URL",
|
"paramDescription": "the isLookup URL",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "l",
|
||||||
|
"paramLongName": "exportLinks",
|
||||||
|
"paramDescription": "should export also links",
|
||||||
|
"paramRequired": false
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="ImportDatacite"/>
|
<start to="TransformJob"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
@ -70,6 +70,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${mainPath}/datacite_dump</arg>
|
<arg>--sourcePath</arg><arg>${mainPath}/datacite_dump</arg>
|
||||||
<arg>--targetPath</arg><arg>${mainPath}/datacite_oaf</arg>
|
<arg>--targetPath</arg><arg>${mainPath}/datacite_oaf</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
<arg>--exportLinks</arg><arg>true</arg>
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
Loading…
Reference in New Issue