fixed scholexplorer bug

This commit is contained in:
Sandro La Bruzzo 2024-05-17 14:16:43 +02:00
parent 6efab4d88e
commit a87f9ea643
3 changed files with 10 additions and 9 deletions

View File

@ -2,7 +2,14 @@ package eu.dnetlib.dhp.sx.graph
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.schema.oaf.{KeyValue, Result, StructuredProperty} import eu.dnetlib.dhp.schema.oaf.{KeyValue, Result, StructuredProperty}
import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixCollectedFrom, ScholixEntityId, ScholixIdentifier, ScholixRelationship, ScholixResource} import eu.dnetlib.dhp.schema.sx.scholix.{
Scholix,
ScholixCollectedFrom,
ScholixEntityId,
ScholixIdentifier,
ScholixRelationship,
ScholixResource
}
import org.json4s import org.json4s
import org.json4s.DefaultFormats import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods.parse import org.json4s.jackson.JsonMethods.parse
@ -22,7 +29,7 @@ case class RelKeyValue(key: String, value: String) {}
object ScholexplorerUtils { object ScholexplorerUtils {
val OPENAIRE_IDENTIFIER_SCHEMA: String = "OpenAIRE Identifier" val OPENAIRE_IDENTIFIER_SCHEMA: String = "OpenAIRE Identifier"
val mapper= new ObjectMapper() val mapper = new ObjectMapper()
case class RelationVocabulary(original: String, inverse: String) {} case class RelationVocabulary(original: String, inverse: String) {}

View File

@ -109,19 +109,15 @@ class SparkCreateScholexplorerDump(propertyPath: String, args: Array[String], lo
val relations = spark.read.load(s"$outputPath/relation").as[RelationInfo] val relations = spark.read.load(s"$outputPath/relation").as[RelationInfo]
val resource = spark.read.load(s"$outputPath/resource").as[ScholixResource] val resource = spark.read.load(s"$outputPath/resource").as[ScholixResource]
val scholix_one_verse = relations val scholix_one_verse = relations
.joinWith(resource, relations("source") === resource("dnetIdentifier"), "inner") .joinWith(resource, relations("source") === resource("dnetIdentifier"), "inner")
.map(res => ScholexplorerUtils.generateScholix(res._1, res._2)) .map(res => ScholexplorerUtils.generateScholix(res._1, res._2))
.map(s=> (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix]))) .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix])))
val resourceTarget = relations val resourceTarget = relations
.joinWith(resource, relations("target") === resource("dnetIdentifier"), "inner") .joinWith(resource, relations("target") === resource("dnetIdentifier"), "inner")
.map(res => (res._1.id, res._2))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[ScholixResource]))) .map(res => (res._1.id, res._2))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[ScholixResource])))
scholix_one_verse scholix_one_verse
.joinWith(resourceTarget, scholix_one_verse("_1") === resourceTarget("_1"), "inner") .joinWith(resourceTarget, scholix_one_verse("_1") === resourceTarget("_1"), "inner")
.map(k => ScholexplorerUtils.updateTarget(k._1._2, k._2._2)) .map(k => ScholexplorerUtils.updateTarget(k._1._2, k._2._2))

View File

@ -22,7 +22,5 @@ class ScholixGenerationTest {
// ) // )
app.generateScholix("/home/sandro/Downloads/scholix/", spark) app.generateScholix("/home/sandro/Downloads/scholix/", spark)
} }
} }