2023-12-01 15:05:45 +01:00
5 changed files with 268 additions and 300 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -4,38 +4,13 @@ package eu.dnetlib.dhp.oa.merge;
 import java.text.Normalizer;
 import java.util.*;
 import java.util.stream.Collectors;
-
 import org.apache.commons.lang3.StringUtils;
-import org.jetbrains.annotations.NotNull;
-
 import com.wcohen.ss.JaroWinkler;
-
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.pace.model.Person;
 import scala.Tuple2;

-class SimilarityCellInfo implements Comparable<SimilarityCellInfo> {
-
-	public int authorPosition = 0;
-	public int orcidPosition = 0;
-
-	public double maxColumnSimilarity = 0.0;
-
-	public SimilarityCellInfo() {
-	}
-
-	public void setValues(final int authPos, final int orcidPos, final double similarity) {
-		this.authorPosition = authPos;
-		this.orcidPosition = orcidPos;
-		this.maxColumnSimilarity = similarity;
-	}
-
-	@Override
-	public int compareTo(@NotNull SimilarityCellInfo o) {
-		return Double.compare(maxColumnSimilarity, o.maxColumnSimilarity);
-	}
-}

 public class AuthorMerger {

@ -183,6 +158,7 @@ public class AuthorMerger {
    /**
     * This method tries to figure out when two author are the same in the contest
     * of ORCID enrichment
+     *
     * @param left  Author in the OAF entity
     * @param right Author ORCID
     * @return based on a heuristic on the names of the authors if they are the same.
@ -238,6 +214,7 @@ public class AuthorMerger {

    /**
     * Method to enrich ORCID information in one list of authors based on another list
+     *
     * @param baseAuthor  the Author List in the OAF Entity
     * @param orcidAuthor The list of ORCID Author intersected
     * @return The Author List of the OAF Entity enriched with the orcid Author
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -92,7 +92,6 @@ object SparkGenerateDoiBoost {
      .mode(SaveMode.Overwrite)
      .save(s"$workingDirPath/firstJoin")

-
    logger.info("Phase 2) Join Result with MAG")
    val sj: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
--- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala
@ -6,13 +6,10 @@ import org.junit.jupiter.api.Test
 import org.slf4j.{Logger, LoggerFactory}
 import org.apache.spark.sql.functions._

-
 class EnrichOrcidTest {

  val log: Logger = LoggerFactory.getLogger(getClass)

-
-
  def test() = {
    val spark = SparkSession.builder().master("local[*]").getOrCreate()
 //    spark.sparkContext.setLogLevel("ERROR")
@ -63,7 +60,6 @@ class EnrichOrcidTest {
 //      }).filter(author => author != null)
 //    })

-
    Encoders
    import spark.implicits._

@ -76,10 +72,6 @@ class EnrichOrcidTest {
 //
 //      .show()

-
-
-
-
  }

 }