From 8f7623e77afe2edce3681a81a0283f007f733c9d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:14:20 +0200 Subject: [PATCH] Hosted By Map - refactoring and application of the new aggregator --- .../SparkApplyHostedByMapToDatasource.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 4ecea63f3..fad313f1c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -45,7 +45,7 @@ object SparkApplyHostedByMapToDatasource { val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") - val workingPath = parser.get("workingPath") + val preparedInfoPath = parser.get("preparedInfoPath") implicit val formats = DefaultFormats @@ -55,17 +55,15 @@ object SparkApplyHostedByMapToDatasource { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val dats : Dataset[Datasource] = spark.read.textFile("$graphPath/datasource") + val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - - + val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(s"$graphPath/datasource") + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) }