forked from D-Net/dnet-hadoop
added relations to datacite mapping
This commit is contained in:
parent
e57294ac99
commit
5b724d9972
|
@ -3,10 +3,9 @@ package eu.dnetlib.dhp.actionmanager.datacite
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||||
|
import eu.dnetlib.dhp.schema.common.{ModelConstants, ModelSupport}
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
|
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType}
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
|
import eu.dnetlib.dhp.schema.oaf.{AccessRight, Author, DataInfo, Instance, KeyValue, Oaf, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import org.apache.commons.lang3.StringUtils
|
import org.apache.commons.lang3.StringUtils
|
||||||
|
@ -25,6 +24,8 @@ import scala.io.{Codec, Source}
|
||||||
|
|
||||||
case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {}
|
case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {}
|
||||||
|
|
||||||
|
case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {}
|
||||||
|
|
||||||
case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
|
case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
|
||||||
|
|
||||||
case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {}
|
case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {}
|
||||||
|
@ -43,6 +44,36 @@ case class HostedByMapType(openaire_id: String, datacite_name: String, official_
|
||||||
|
|
||||||
object DataciteToOAFTransformation {
|
object DataciteToOAFTransformation {
|
||||||
|
|
||||||
|
val REL_TYPE_VALUE:String = "resultResult"
|
||||||
|
|
||||||
|
val subRelTypeMapping: Map[String,String] = Map(
|
||||||
|
"References" ->"relationship",
|
||||||
|
"IsSupplementTo" ->"supplement",
|
||||||
|
"IsPartOf" ->"part",
|
||||||
|
"HasPart" ->"part",
|
||||||
|
"IsVersionOf" ->"version",
|
||||||
|
"HasVersion" ->"version",
|
||||||
|
"IsIdenticalTo" ->"relationship",
|
||||||
|
"IsPreviousVersionOf" ->"version",
|
||||||
|
"IsContinuedBy" ->"relationship",
|
||||||
|
"Continues" ->"relationship",
|
||||||
|
"IsNewVersionOf" ->"version",
|
||||||
|
"IsSupplementedBy" ->"supplement",
|
||||||
|
"IsDocumentedBy" ->"relationship",
|
||||||
|
"IsSourceOf" ->"relationship",
|
||||||
|
"Cites" ->"citation",
|
||||||
|
"IsCitedBy" ->"citation",
|
||||||
|
"IsDerivedFrom" ->"relationship",
|
||||||
|
"IsVariantFormOf" ->"version",
|
||||||
|
"IsReferencedBy" ->"relationship",
|
||||||
|
"IsObsoletedBy" ->"version",
|
||||||
|
"Reviews" ->"review",
|
||||||
|
"Documents" ->"relationship",
|
||||||
|
"IsCompiledBy" ->"relationship",
|
||||||
|
"Compiles" ->"relationship",
|
||||||
|
"IsReviewedBy" ->"review"
|
||||||
|
)
|
||||||
|
|
||||||
implicit val codec: Codec = Codec("UTF-8")
|
implicit val codec: Codec = Codec("UTF-8")
|
||||||
codec.onMalformedInput(CodingErrorAction.REPLACE)
|
codec.onMalformedInput(CodingErrorAction.REPLACE)
|
||||||
codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||||
|
@ -232,6 +263,7 @@ object DataciteToOAFTransformation {
|
||||||
* As describe in ticket #6377
|
* As describe in ticket #6377
|
||||||
* when the result come from figshare we need to remove subject
|
* when the result come from figshare we need to remove subject
|
||||||
* and set Access rights OPEN.
|
* and set Access rights OPEN.
|
||||||
|
*
|
||||||
* @param r
|
* @param r
|
||||||
*/
|
*/
|
||||||
def fix_figshare(r: Result): Unit = {
|
def fix_figshare(r: Result): Unit = {
|
||||||
|
@ -248,6 +280,12 @@ object DataciteToOAFTransformation {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def createDNetTargetIdentifier(pid: String, pidType: String, idPrefix: String): String = {
|
||||||
|
val f_part = s"$idPrefix|${pidType.toLowerCase}".padTo(15, '_')
|
||||||
|
s"$f_part::${IdentifierFactory.md5(pid.toLowerCase)}"
|
||||||
|
}
|
||||||
|
|
||||||
def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
|
def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
|
||||||
OafMapperUtils.structuredProperty(dt, q, null)
|
OafMapperUtils.structuredProperty(dt, q, null)
|
||||||
}
|
}
|
||||||
|
@ -286,7 +324,7 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup): List[Oaf] = {
|
def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup, exportLinks: Boolean): List[Oaf] = {
|
||||||
if (filter_json(input))
|
if (filter_json(input))
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
|
@ -468,11 +506,44 @@ object DataciteToOAFTransformation {
|
||||||
JField("awardUri", JString(awardUri)) <- fundingReferences
|
JField("awardUri", JString(awardUri)) <- fundingReferences
|
||||||
} yield awardUri
|
} yield awardUri
|
||||||
|
|
||||||
val relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
|
var relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
|
||||||
|
|
||||||
|
|
||||||
fix_figshare(result)
|
fix_figshare(result)
|
||||||
result.setId(IdentifierFactory.createIdentifier(result))
|
result.setId(IdentifierFactory.createIdentifier(result))
|
||||||
if (result.getId == null)
|
if (result.getId == null)
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
|
if (exportLinks) {
|
||||||
|
val rels: List[RelatedIdentifierType] = for {
|
||||||
|
JObject(relIdentifier) <- json \\ "relatedIdentifiers"
|
||||||
|
JField("relationType", JString(relationType)) <- relIdentifier
|
||||||
|
JField("relatedIdentifierType", JString(relatedIdentifierType)) <- relIdentifier
|
||||||
|
JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier
|
||||||
|
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
|
||||||
|
|
||||||
|
|
||||||
|
relations = relations ::: rels
|
||||||
|
.filter(r =>
|
||||||
|
subRelTypeMapping.contains(r.relationType) && (
|
||||||
|
r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
||||||
|
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
|
||||||
|
r.relatedIdentifierType.equalsIgnoreCase("arxiv") )
|
||||||
|
)
|
||||||
|
.map(r => {
|
||||||
|
val rel = new Relation
|
||||||
|
|
||||||
|
val subRelType = subRelTypeMapping.get(r.relationType)
|
||||||
|
rel.setRelType(REL_TYPE_VALUE)
|
||||||
|
rel.setSubRelType(subRelType.get)
|
||||||
|
rel.setRelClass(r.relationType)
|
||||||
|
rel.setSource(result.getId)
|
||||||
|
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
||||||
|
rel.setDataInfo(dataInfo)
|
||||||
|
rel.setTarget(createDNetTargetIdentifier(r.relatedIdentifier, r.relatedIdentifierType, "50|"))
|
||||||
|
rel
|
||||||
|
})
|
||||||
|
}
|
||||||
if (relations != null && relations.nonEmpty) {
|
if (relations != null && relations.nonEmpty) {
|
||||||
List(result) ::: relations
|
List(result) ::: relations
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ object GenerateDataciteDatasetSpark {
|
||||||
val master = parser.get("master")
|
val master = parser.get("master")
|
||||||
val sourcePath = parser.get("sourcePath")
|
val sourcePath = parser.get("sourcePath")
|
||||||
val targetPath = parser.get("targetPath")
|
val targetPath = parser.get("targetPath")
|
||||||
|
val exportLinks = "true".equalsIgnoreCase(parser.get("exportLinks"))
|
||||||
val isLookupUrl: String = parser.get("isLookupUrl")
|
val isLookupUrl: String = parser.get("isLookupUrl")
|
||||||
log.info("isLookupUrl: {}", isLookupUrl)
|
log.info("isLookupUrl: {}", isLookupUrl)
|
||||||
|
|
||||||
|
@ -40,7 +41,7 @@ object GenerateDataciteDatasetSpark {
|
||||||
|
|
||||||
spark.read.load(sourcePath).as[DataciteType]
|
spark.read.load(sourcePath).as[DataciteType]
|
||||||
.filter(d => d.isActive)
|
.filter(d => d.isActive)
|
||||||
.flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies))
|
.flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks))
|
||||||
.filter(d => d != null)
|
.filter(d => d != null)
|
||||||
.write.mode(SaveMode.Overwrite).save(targetPath)
|
.write.mode(SaveMode.Overwrite).save(targetPath)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,15 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.datacite
|
package eu.dnetlib.dhp.actionmanager.datacite
|
||||||
|
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
|
import com.fasterxml.jackson.databind.SerializationFeature
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||||
import org.junit.jupiter.api.extension.ExtendWith
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||||
import org.mockito.junit.jupiter.MockitoExtension
|
import org.mockito.junit.jupiter.MockitoExtension
|
||||||
import org.codehaus.jackson.map.ObjectMapper
|
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||||
|
@ -25,9 +28,15 @@ class DataciteToOAFTest extends AbstractVocabularyTest{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||||
val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies )
|
val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies, true )
|
||||||
println (mapper.defaultPrettyPrintingWriter().writeValueAsString(res.head))
|
|
||||||
|
res.foreach(r => {
|
||||||
|
println (mapper.writeValueAsString(r))
|
||||||
|
println("----------------------------")
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
package eu.dnetlib.dhp.oa.sx.graphimport
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import org.apache.commons.io.IOUtils
|
||||||
|
import org.apache.spark.SparkConf
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
|
||||||
|
object SparkDataciteToOAF {
|
||||||
|
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
val conf: SparkConf = new SparkConf()
|
||||||
|
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/datacite_to_df_params.json")))
|
||||||
|
parser.parseArgument(args)
|
||||||
|
val spark: SparkSession =
|
||||||
|
SparkSession
|
||||||
|
.builder()
|
||||||
|
.config(conf)
|
||||||
|
.appName(getClass.getSimpleName)
|
||||||
|
.master(parser.get("master")).getOrCreate()
|
||||||
|
import spark.implicits._
|
||||||
|
|
||||||
|
|
||||||
|
val sc = spark.sparkContext
|
||||||
|
|
||||||
|
val inputPath = parser.get("inputPath")
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue