From a87f9ea64317dff7afac5045a4c64bb9c8a26954 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 17 May 2024 14:16:43 +0200 Subject: [PATCH] fixed scholexplorer bug --- .../eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala | 11 +++++++++-- .../dhp/sx/graph/SparkCreateScholexplorerDump.scala | 6 +----- .../dhp/sx/graph/scholix/ScholixGenerationTest.scala | 2 -- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala index f62f271e3..d171d96d9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala @@ -2,7 +2,14 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.oaf.{KeyValue, Result, StructuredProperty} -import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixCollectedFrom, ScholixEntityId, ScholixIdentifier, ScholixRelationship, ScholixResource} +import eu.dnetlib.dhp.schema.sx.scholix.{ + Scholix, + ScholixCollectedFrom, + ScholixEntityId, + ScholixIdentifier, + ScholixRelationship, + ScholixResource +} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -22,7 +29,7 @@ case class RelKeyValue(key: String, value: String) {} object ScholexplorerUtils { val OPENAIRE_IDENTIFIER_SCHEMA: String = "OpenAIRE Identifier" - val mapper= new ObjectMapper() + val mapper = new ObjectMapper() case class RelationVocabulary(original: String, inverse: String) {} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala index 32aa68665..dd420ab95 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala @@ -109,19 +109,15 @@ class SparkCreateScholexplorerDump(propertyPath: String, args: Array[String], lo val relations = spark.read.load(s"$outputPath/relation").as[RelationInfo] val resource = spark.read.load(s"$outputPath/resource").as[ScholixResource] - - val scholix_one_verse = relations .joinWith(resource, relations("source") === resource("dnetIdentifier"), "inner") .map(res => ScholexplorerUtils.generateScholix(res._1, res._2)) - .map(s=> (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix]))) - + .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix]))) val resourceTarget = relations .joinWith(resource, relations("target") === resource("dnetIdentifier"), "inner") .map(res => (res._1.id, res._2))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[ScholixResource]))) - scholix_one_verse .joinWith(resourceTarget, scholix_one_verse("_1") === resourceTarget("_1"), "inner") .map(k => ScholexplorerUtils.updateTarget(k._1._2, k._2._2)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala index 67d40dcf1..204fe9794 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala @@ -22,7 +22,5 @@ class ScholixGenerationTest { // ) app.generateScholix("/home/sandro/Downloads/scholix/", spark) - - } }