3 changed files with 21 additions and 84 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -1,17 +1,11 @@
 package eu.dnetlib.dhp.oa.merge;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.text.Normalizer;
 import java.util.*;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.tuple.MutablePair;
 import org.apache.commons.lang3.tuple.Pair;
 import org.jetbrains.annotations.NotNull;
 import com.wcohen.ss.JaroWinkler;
@ -161,42 +155,8 @@ public class AuthorMerger {
 			.replaceAll("(\\n)+", " ")
 			.trim();
 //        return Arrays.stream(fullname.split("[\\s | , | ;]+")).map(String::toLowerCase).sorted().collect(Collectors.joining());
 	}
 //
 //    public static List<Author> enrichOrcid2(List<Author> baseAuthor, List<Author> orcidAuthor) {
 //        if (baseAuthor == null || baseAuthor.isEmpty())
 //            return orcidAuthor;
 //
 //        if (orcidAuthor == null || orcidAuthor.isEmpty())
 //            return baseAuthor;
 //
 //        if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
 //            return baseAuthor;
 //
 //
 //        Map<String, List<Author>> pubClusters = baseAuthor.stream().collect(Collectors.toMap(AuthorMerger::generateAuthorkey, Arrays::asList, (a, b) -> {
 //            a.addAll(b);
 //            return a;
 //        }));
 //
 //        Map<String, List<Author>> orcidClusters = baseAuthor.stream().collect(Collectors.toMap(AuthorMerger::generateAuthorkey, Arrays::asList, (a, b) -> {
 //            a.addAll(b);
 //            return a;
 //        }));
 //
 //        System.out.println(pubClusters.keySet().size());
 //        System.out.println(orcidClusters.keySet().size());
 //
 //
 //
 //
 //       return null;
 //
 //
 //    }
 	static int hammingDist(String str1, String str2) {
 		if (str1.length() != str2.length())
 			return Math.max(str1.length(), str2.length());
@ -220,7 +180,14 @@ public class AuthorMerger {
 		return null;
 	}
-	public static boolean checkSimilarity2(final Author left, final Author right) {
+	/**
 	 * This method tries to figure out when two author are the same in the contest
 	 * of ORCID enrichment
 	 * @param left Author in the OAF entity
 	 * @param right Author ORCID
 	 * @return based on a heuristic on the names of the authors if they are the same.
 	 */
 	public static boolean checkORCIDSimilarity(final Author left, final Author right) {
 		final Person pl = parse(left);
 		final Person pr = parse(right);
@ -267,8 +234,16 @@ public class AuthorMerger {
 		else
 			return false;
 	}
 	//
-	public static List<Author> enrichOrcid2(List<Author> baseAuthor, List<Author> orcidAuthor) {
+
 	/**
 	 * Method to enrich ORCID information in one list of authors based on another list
 	 * @param baseAuthor the Author List in the OAF Entity
 	 * @param orcidAuthor The list of ORCID Author intersected
 	 * @return The Author List of the OAF Entity enriched with the orcid Author
 	 */
 	public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
 		if (baseAuthor == null || baseAuthor.isEmpty())
 			return orcidAuthor;
@ -283,7 +258,7 @@ public class AuthorMerger {
 		oAuthor.addAll(orcidAuthor);
 		baseAuthor.forEach(ba -> {
-			Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkSimilarity2(ba, oa)).findFirst();
+			Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
 			if (aMatch.isPresent()) {
 				final Author sameAuthor = aMatch.get();
 				addPid(ba, sameAuthor.getPid());
@ -293,40 +268,6 @@ public class AuthorMerger {
 		return baseAuthor;
 	}
 	public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
 		if (baseAuthor == null || baseAuthor.isEmpty())
 			return orcidAuthor;
 		if (orcidAuthor == null || orcidAuthor.isEmpty())
 			return baseAuthor;
 		if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
 			return baseAuthor;
 		final Double similarityMatrix[][] = new Double[baseAuthor.size()][orcidAuthor.size()];
 		final List<SimilarityCellInfo> maxColums = new ArrayList<>();
 		for (int i = 0; i < orcidAuthor.size(); i++)
 			maxColums.add(new SimilarityCellInfo());
 		for (int i = 0; i < baseAuthor.size(); i++) {
 			for (int j = 0; j < orcidAuthor.size(); j++) {
 				similarityMatrix[i][j] = sim(baseAuthor.get(i), orcidAuthor.get(j));
 				if (maxColums.get(j).maxColumnSimilarity < similarityMatrix[i][j])
 					maxColums.get(j).setValues(i, j, similarityMatrix[i][j]);
 			}
 		}
 		maxColums
 			.stream()
 			.sorted()
 			.filter(si -> si.maxColumnSimilarity > 0.85)
 			.forEach(si -> addPid(baseAuthor.get(si.authorPosition), orcidAuthor.get(si.orcidPosition).getPid()));
 		return baseAuthor;
 	}
 	private static void addPid(final Author a, final List<StructuredProperty> pids) {
 		if (a.getPid() == null) {
--- a/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
@ -4,13 +4,9 @@ package eu.dnetlib.oa.merge;
 import static org.junit.jupiter.api.Assertions.*;
 import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
 import java.util.stream.Collectors;
 import org.junit.jupiter.api.Test;
 import org.junit.platform.commons.util.StringUtils;
@ -67,7 +63,7 @@ public class AuthorMergerTest {
 				long start = System.currentTimeMillis();
 //                final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
-				final List<Author> enrichedList = AuthorMerger.enrichOrcid2(publicationAuthors, orcidAuthors);
+				final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
 				long enrichedAuthorWithPid = enrichedList
 					.stream()
@ -105,7 +101,7 @@ public class AuthorMergerTest {
 		right.setSurname("Anand");
 		right.setFullname("Rachna, Anand");
 //        System.out.println(AuthorMerger.normalize(right.getFullname()));
-		boolean same = AuthorMerger.checkSimilarity2(left, right);
+		boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
 		assertTrue(same);
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
@ -89,7 +89,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
          p
        }
        case (p: Publication, r: Row) =>
-          p.setAuthor(AuthorMerger.enrichOrcid2(p.getAuthor, AuthorEnricher.toOAFAuthor(r)))
+          p.setAuthor(AuthorMerger.enrichOrcid(p.getAuthor, AuthorEnricher.toOAFAuthor(r)))
          p
      }
      .write