forked from D-Net/dnet-hadoop
fixed scholexplorer bug
This commit is contained in:
parent
6efab4d88e
commit
a87f9ea643
|
@ -2,7 +2,14 @@ package eu.dnetlib.dhp.sx.graph
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import eu.dnetlib.dhp.schema.oaf.{KeyValue, Result, StructuredProperty}
|
import eu.dnetlib.dhp.schema.oaf.{KeyValue, Result, StructuredProperty}
|
||||||
import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixCollectedFrom, ScholixEntityId, ScholixIdentifier, ScholixRelationship, ScholixResource}
|
import eu.dnetlib.dhp.schema.sx.scholix.{
|
||||||
|
Scholix,
|
||||||
|
ScholixCollectedFrom,
|
||||||
|
ScholixEntityId,
|
||||||
|
ScholixIdentifier,
|
||||||
|
ScholixRelationship,
|
||||||
|
ScholixResource
|
||||||
|
}
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.jackson.JsonMethods.parse
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
@ -22,7 +29,7 @@ case class RelKeyValue(key: String, value: String) {}
|
||||||
object ScholexplorerUtils {
|
object ScholexplorerUtils {
|
||||||
|
|
||||||
val OPENAIRE_IDENTIFIER_SCHEMA: String = "OpenAIRE Identifier"
|
val OPENAIRE_IDENTIFIER_SCHEMA: String = "OpenAIRE Identifier"
|
||||||
val mapper= new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
case class RelationVocabulary(original: String, inverse: String) {}
|
case class RelationVocabulary(original: String, inverse: String) {}
|
||||||
|
|
||||||
|
|
|
@ -109,19 +109,15 @@ class SparkCreateScholexplorerDump(propertyPath: String, args: Array[String], lo
|
||||||
val relations = spark.read.load(s"$outputPath/relation").as[RelationInfo]
|
val relations = spark.read.load(s"$outputPath/relation").as[RelationInfo]
|
||||||
val resource = spark.read.load(s"$outputPath/resource").as[ScholixResource]
|
val resource = spark.read.load(s"$outputPath/resource").as[ScholixResource]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val scholix_one_verse = relations
|
val scholix_one_verse = relations
|
||||||
.joinWith(resource, relations("source") === resource("dnetIdentifier"), "inner")
|
.joinWith(resource, relations("source") === resource("dnetIdentifier"), "inner")
|
||||||
.map(res => ScholexplorerUtils.generateScholix(res._1, res._2))
|
.map(res => ScholexplorerUtils.generateScholix(res._1, res._2))
|
||||||
.map(s=> (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix])))
|
.map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix])))
|
||||||
|
|
||||||
|
|
||||||
val resourceTarget = relations
|
val resourceTarget = relations
|
||||||
.joinWith(resource, relations("target") === resource("dnetIdentifier"), "inner")
|
.joinWith(resource, relations("target") === resource("dnetIdentifier"), "inner")
|
||||||
.map(res => (res._1.id, res._2))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[ScholixResource])))
|
.map(res => (res._1.id, res._2))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[ScholixResource])))
|
||||||
|
|
||||||
|
|
||||||
scholix_one_verse
|
scholix_one_verse
|
||||||
.joinWith(resourceTarget, scholix_one_verse("_1") === resourceTarget("_1"), "inner")
|
.joinWith(resourceTarget, scholix_one_verse("_1") === resourceTarget("_1"), "inner")
|
||||||
.map(k => ScholexplorerUtils.updateTarget(k._1._2, k._2._2))
|
.map(k => ScholexplorerUtils.updateTarget(k._1._2, k._2._2))
|
||||||
|
|
|
@ -22,7 +22,5 @@ class ScholixGenerationTest {
|
||||||
// )
|
// )
|
||||||
app.generateScholix("/home/sandro/Downloads/scholix/", spark)
|
app.generateScholix("/home/sandro/Downloads/scholix/", spark)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue