removed all comments in Italian

2021-08-11 16:23:33 +02:00 · 2021-08-11 16:23:33 +02:00 · b1c6140ebf
parent 52c18c2697
commit b1c6140ebf
3 changed files with 5 additions and 11 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala
@ -61,8 +61,6 @@ object SparkApplyHostedByMapToDatasource {
    val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath)
      .map(ei => mapper.readValue(ei, classOf[EntityInfo])))

-    //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario
-
    applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath)
  }

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala
@ -14,9 +14,6 @@ import org.slf4j.{Logger, LoggerFactory}

 import scala.collection.JavaConverters._

-//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance
-// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true
-

 object SparkApplyHostedByMapToResult {

@ -76,8 +73,6 @@ object SparkApplyHostedByMapToResult {
    val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath)
        .map(ei => mapper.readValue(ei, classOf[EntityInfo]))

-    //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance
-    // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true
    applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath)


--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala
@ -106,14 +106,15 @@ object SparkPrepareHostedByInfoToApply {
    import spark.implicits._


-    //STEP1: leggere la hostedbymap e trasformarla in entity info
+    //STEP1: read the hostedbymap and transform it in EntityInfo
    val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo)

-   //STEP2: creare la mappa publication id issn, eissn, lissn esplosa
+   //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn)
    val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication")

-    //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left
-    // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource
+    //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just
+    //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map)
+    //to this entry we add the id of the datasource for the next step
    joinResHBM(resultInfoDataset, hostedByInfo)
      .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath)