1
0
Fork 0

implement filter of openCitation

This commit is contained in:
Sandro La Bruzzo 2021-11-09 11:27:12 +01:00
parent 7bd224f051
commit 6477a40670
1 changed files with 6 additions and 1 deletions

View File

@ -59,7 +59,12 @@ object SparkConvertRDDtoDataset {
log.info("Converting Relation") log.info("Converting Relation")
val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation").map(s => mapper.readValue(s, classOf[Relation])).filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50")) val relationSemanticFilter = List("cites", "iscitedby","merges", "ismergedin")
val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation]))
.filter(r=> r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")