Hosted By Map - refactoring and application of the new aggregator

This commit is contained in:
Miriam Baglioni 2021-08-04 10:14:20 +02:00
parent a7bf314fd2
commit 8f7623e77a
1 changed files with 5 additions and 7 deletions

View File

@ -45,7 +45,7 @@ object SparkApplyHostedByMapToDatasource {
val graphPath = parser.get("graphPath")
val outputPath = parser.get("outputPath")
val workingPath = parser.get("workingPath")
val preparedInfoPath = parser.get("preparedInfoPath")
implicit val formats = DefaultFormats
@ -55,17 +55,15 @@ object SparkApplyHostedByMapToDatasource {
implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo])
val mapper = new ObjectMapper()
val dats : Dataset[Datasource] = spark.read.textFile("$graphPath/datasource")
val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource")
.map(r => mapper.readValue(r, classOf[Datasource]))
val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo")
.map(ei => mapper.readValue(ei, classOf[EntityInfo]))
val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath)
.map(ei => mapper.readValue(ei, classOf[EntityInfo])))
//c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario
applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(s"$graphPath/datasource")
applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath)
}