1
0
Fork 0

removed all comments in Italian

This commit is contained in:
Miriam Baglioni 2021-08-11 16:23:33 +02:00
parent 52c18c2697
commit b1c6140ebf
3 changed files with 5 additions and 11 deletions

View File

@ -61,8 +61,6 @@ object SparkApplyHostedByMapToDatasource {
val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath)
.map(ei => mapper.readValue(ei, classOf[EntityInfo])))
//c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario
applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath)
}

View File

@ -14,9 +14,6 @@ import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance
// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true
object SparkApplyHostedByMapToResult {
@ -76,8 +73,6 @@ object SparkApplyHostedByMapToResult {
val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath)
.map(ei => mapper.readValue(ei, classOf[EntityInfo]))
//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance
// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true
applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath)

View File

@ -106,14 +106,15 @@ object SparkPrepareHostedByInfoToApply {
import spark.implicits._
//STEP1: leggere la hostedbymap e trasformarla in entity info
//STEP1: read the hostedbymap and transform it in EntityInfo
val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo)
//STEP2: creare la mappa publication id issn, eissn, lissn esplosa
//STEP2: create association (publication, issn), (publication, eissn), (publication, lissn)
val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication")
//STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left
// e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource
//STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just
//one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map)
//to this entry we add the id of the datasource for the next step
joinResHBM(resultInfoDataset, hostedByInfo)
.write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath)