From b1c6140ebf01608af7792d4d528c83ea62760616 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:23:33 +0200 Subject: [PATCH] removed all comments in Italian --- .../hostedbymap/SparkApplyHostedByMapToDatasource.scala | 2 -- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 5 ----- .../hostedbymap/SparkPrepareHostedByInfoToApply.scala | 9 +++++---- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index fad313f1c..ae1454b47 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -61,8 +61,6 @@ object SparkApplyHostedByMapToDatasource { val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 4c3b98f3b..533e439b7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -14,9 +14,6 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance -// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true - object SparkApplyHostedByMapToResult { @@ -76,8 +73,6 @@ object SparkApplyHostedByMapToResult { val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance - // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index d342d57b0..6db0ad33d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -106,14 +106,15 @@ object SparkPrepareHostedByInfoToApply { import spark.implicits._ - //STEP1: leggere la hostedbymap e trasformarla in entity info + //STEP1: read the hostedbymap and transform it in EntityInfo val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: creare la mappa publication id issn, eissn, lissn esplosa + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") - //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left - // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource + //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just + //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) + //to this entry we add the id of the datasource for the next step joinResHBM(resultInfoDataset, hostedByInfo) .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath)