From df7ee77c7afd7af06f90ebaac447decc665f2428 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 4 Nov 2021 16:24:07 +0100 Subject: [PATCH] [DOIBoost Mapping] removed not needed comments --- .../doiboost/mag/SparkProcessMAG.scala | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index fa3be973d..016279787 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -114,25 +114,6 @@ object SparkProcessMAG { .save(s"$workingPath/merge_step_3") - //no more needed to add the instance to mag records -// magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication] -// .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] -// -// val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl] -// -// -// -// logger.info("Phase 5) enrich publication with URL and Instances") -// magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left") -// .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) } -// .write.mode(SaveMode.Overwrite) -// .save(s"$workingPath/merge_step_3") - - -// logger.info("Phase 6) Enrich Publication with description") -// val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] -// pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") - val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract] @@ -171,11 +152,7 @@ object SparkProcessMAG { .map(_._2) .write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") -// val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] -// .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) -// .map(_._2) -// -// spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") + } }