diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index 64be5e79a..a359eb3c6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -15,6 +15,12 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) + def fixORCIDItem(item :ORCIDItem):ORCIDItem = { + new ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + + } + + def run(spark:SparkSession,sourcePath:String,workingPath:String, targetPath:String):Unit = { import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] @@ -34,10 +40,11 @@ object SparkConvertORCIDToOAF { works.joinWith(authors, authors("oid").equalTo(works("oid"))) .map(i =>{ val doi = i._1.doi - val author = i._2 + var author = i._2 (doi, author) }).groupBy(col("_1").alias("doi")) - .agg(collect_list(col("_2")).alias("authors")) + .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] + .map(s => fixORCIDItem(s)) .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index e90e43a20..436a53cbe 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -108,7 +108,7 @@ public class SparkOrcidToResultFromSemRelJob { return value -> { R ret = value._1(); Optional rol = Optional.ofNullable(value._2()); - if (rol.isPresent()) { + if (rol.isPresent() && Optional.ofNullable(ret.getAuthor()).isPresent()) { List toenrich_author = ret.getAuthor(); List autoritativeAuthors = rol.get().getAuthorList(); for (Author author : toenrich_author) {