From e15a1969a57726228f487e8b06b467ad5cdbef33 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 14 Oct 2021 14:33:56 +0200 Subject: [PATCH] applying fix on the DOIBoost construction process that somehow wasn't part of the merge done in 83c90c71809c947adcfc1e70eb9d9fad195d87aa --- .../eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index e501b4823..c08e09f53 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -206,9 +206,9 @@ object SparkGenerateDoiBoost { (r.getSource, r) else if (r.getTarget.startsWith("unresolved")) (r.getTarget,r) - else + else ("resolved", r) - }) + })(Encoders.tuple(Encoders.STRING, mapEncoderRel)) val openaireOrganization:Dataset[(String,String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x,y) => if (x != null) x else y ).map(_._2) @@ -221,8 +221,8 @@ object SparkGenerateDoiBoost { currentRels.setSource(currentOrgs._1) else currentRels.setTarget(currentOrgs._1) - currentRels - }.write.save(s"$workingDirPath/doiBoostPublicationAffiliation") + currentRels + }.filter(r=> !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => { val affiliation = item._2 @@ -242,6 +242,6 @@ object SparkGenerateDoiBoost { else null }).filter(o=> o!=null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization") - } + } -} +} \ No newline at end of file