From 032bcc8279849cfa498bc8227f8a96c4e1a48525 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 20 May 2024 09:24:15 +0200 Subject: [PATCH] since last beta workflow we decide to introduce in the graph only MAG item with DOI and set them invisible ( this should be the same behaviour of the previous DOIBoost mapping). This commit apply this type of mapping --- .../dhp/collection/mag/MagUtility.scala | 41 +++++-------------- .../dhp/collection/mag/SparkMAGtoOAF.scala | 3 ++ .../dhp/collection/mag/MAGMappingTest.scala | 12 ++++-- 3 files changed, 22 insertions(+), 34 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/MagUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/MagUtility.scala index df22a6b84..c415dd9a4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/MagUtility.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/MagUtility.scala @@ -79,23 +79,6 @@ object MagUtility extends Serializable { private val MAGCollectedFrom = keyValue(ModelConstants.MAG_ID, ModelConstants.MAG_NAME) private val MAGDataInfo: DataInfo = { - val di = new DataInfo - di.setDeletedbyinference(false) - di.setInferred(false) - di.setInvisible(false) - di.setTrust("0.9") - di.setProvenanceaction( - OafMapperUtils.qualifier( - ModelConstants.SYSIMPORT_ACTIONSET, - ModelConstants.SYSIMPORT_ACTIONSET, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS - ) - ) - di - } - - private val MAGDataInfoInvisible: DataInfo = { val di = new DataInfo di.setDeletedbyinference(false) di.setInferred(false) @@ -111,8 +94,7 @@ object MagUtility extends Serializable { ) di } - - val datatypedict = Map( +val datatypedict = Map( "bool" -> BooleanType, "int" -> IntegerType, "uint" -> IntegerType, @@ -453,7 +435,6 @@ object MagUtility extends Serializable { case "repository" => result = new Publication() - result.setDataInfo(MAGDataInfoInvisible) qualifier( "0038", "Other literature type", @@ -488,8 +469,7 @@ object MagUtility extends Serializable { } if (result != null) { - if (result.getDataInfo == null) - result.setDataInfo(MAGDataInfo) + result.setDataInfo(MAGDataInfo) val i = new Instance i.setInstancetype(tp) i.setInstanceTypeMapping( @@ -512,7 +492,7 @@ object MagUtility extends Serializable { return null result.setCollectedfrom(List(MAGCollectedFrom).asJava) - val pidList = List( + var pidList = List( structuredProperty( paper.paperId.get.toString, qualifier( @@ -525,7 +505,7 @@ object MagUtility extends Serializable { ) ) - result.setPid(pidList.asJava) + result.setOriginalId(pidList.map(s => s.getValue).asJava) @@ -618,10 +598,9 @@ object MagUtility extends Serializable { } val instance = result.getInstance().get(0) - instance.setPid(pidList.asJava) - if (paper.doi.orNull != null) - instance.setAlternateIdentifier( - List( + + if (paper.doi.orNull != null) { + pidList = pidList ::: List( structuredProperty( paper.doi.get, qualifier( @@ -632,8 +611,10 @@ object MagUtility extends Serializable { ), null ) - ).asJava - ) + ) + } + instance.setPid(pidList.asJava) + result.setPid(pidList.asJava) instance.setUrl(paper.urls.get.asJava) instance.setHostedby(ModelConstants.UNKNOWN_REPOSITORY) instance.setCollectedfrom(MAGCollectedFrom) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/SparkMAGtoOAF.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/SparkMAGtoOAF.scala index 5dd38970d..123d8e0f8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/SparkMAGtoOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/mag/SparkMAGtoOAF.scala @@ -35,9 +35,12 @@ class SparkMAGtoOAF(propertyPath: String, args: Array[String], log: Logger) def convertMAG(spark: SparkSession, magBasePath: String, mdStorePath: String): Unit = { import spark.implicits._ + + spark.read .load(s"$magBasePath/mag_denormalized") .as[MAGPaper] + .filter(col("doi").isNotNull) .map(s => MagUtility.convertMAGtoOAF(s)) .filter(s => s != null) .write diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala index 59b91d66b..3ae25decb 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/collection/mag/MAGMappingTest.scala @@ -3,13 +3,17 @@ package eu.dnetlib.dhp.collection.mag import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.{Dataset, Publication, Result} import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions.col import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test + + class MAGMappingTest { val mapper = new ObjectMapper() + def mappingTest(): Unit = { val spark = SparkSession @@ -18,12 +22,12 @@ class MAGMappingTest { .master("local[*]") .getOrCreate() - val s = new SparkMagOrganizationAS(null, null, null) - - s.generateAS(spark, "/home/sandro/Downloads/mag_test", "/home/sandro/Downloads/mag_AS") - + val s = new SparkMAGtoOAF(null, null, null) + s.convertMAG(spark, "/Users/sandro/Downloads/", "/Users/sandro/Downloads/mag_OAF") } + + @Test def mappingMagType(): Unit = {