code formatting

2023-11-30 15:31:42 +01:00 · 2023-11-30 15:31:42 +01:00 · cdfb7588dd
parent 5e22b67b8a
commit cdfb7588dd
5 changed files with 268 additions and 300 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -4,352 +4,329 @@ package eu.dnetlib.dhp.oa.merge;
 import java.text.Normalizer;
 import java.util.*;
 import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
 import org.jetbrains.annotations.NotNull;
 import com.wcohen.ss.JaroWinkler;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.pace.model.Person;
 import scala.Tuple2;
 class SimilarityCellInfo implements Comparable<SimilarityCellInfo> {
 	public int authorPosition = 0;
 	public int orcidPosition = 0;
 	public double maxColumnSimilarity = 0.0;
 	public SimilarityCellInfo() {
 	}
 	public void setValues(final int authPos, final int orcidPos, final double similarity) {
 		this.authorPosition = authPos;
 		this.orcidPosition = orcidPos;
 		this.maxColumnSimilarity = similarity;
 	}
 	@Override
 	public int compareTo(@NotNull SimilarityCellInfo o) {
 		return Double.compare(maxColumnSimilarity, o.maxColumnSimilarity);
 	}
 }
 public class AuthorMerger {
-	private static final Double THRESHOLD = 0.95;
+    private static final Double THRESHOLD = 0.95;
-	private AuthorMerger() {
+    private AuthorMerger() {
-	}
+    }
-	public static List<Author> merge(List<List<Author>> authors) {
+    public static List<Author> merge(List<List<Author>> authors) {
-		authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
+        authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
-		List<Author> author = new ArrayList<>();
+        List<Author> author = new ArrayList<>();
-		for (List<Author> a : authors) {
+        for (List<Author> a : authors) {
-			author = mergeAuthor(author, a);
+            author = mergeAuthor(author, a);
-		}
+        }
-		return author;
+        return author;
-	}
+    }
-	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
+    public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
-		int pa = countAuthorsPids(a);
+        int pa = countAuthorsPids(a);
-		int pb = countAuthorsPids(b);
+        int pb = countAuthorsPids(b);
-		List<Author> base;
+        List<Author> base;
-		List<Author> enrich;
+        List<Author> enrich;
-		int sa = authorsSize(a);
+        int sa = authorsSize(a);
-		int sb = authorsSize(b);
+        int sb = authorsSize(b);
-		if (sa == sb) {
+        if (sa == sb) {
-			base = pa > pb ? a : b;
+            base = pa > pb ? a : b;
-			enrich = pa > pb ? b : a;
+            enrich = pa > pb ? b : a;
-		} else {
+        } else {
-			base = sa > sb ? a : b;
+            base = sa > sb ? a : b;
-			enrich = sa > sb ? b : a;
+            enrich = sa > sb ? b : a;
-		}
+        }
-		enrichPidFromList(base, enrich, threshold);
+        enrichPidFromList(base, enrich, threshold);
-		return base;
+        return base;
-	}
+    }
-	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
+    public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
-		return mergeAuthor(a, b, THRESHOLD);
+        return mergeAuthor(a, b, THRESHOLD);
-	}
+    }
-	private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
+    private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
-		if (base == null || enrich == null)
+        if (base == null || enrich == null)
-			return;
+            return;
-		// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
+        // <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
-		final Map<String, Author> basePidAuthorMap = base
+        final Map<String, Author> basePidAuthorMap = base
-			.stream()
+                .stream()
-			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
+                .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
-			.flatMap(
+                .flatMap(
-				a -> a
+                        a -> a
-					.getPid()
+                                .getPid()
-					.stream()
+                                .stream()
-					.filter(Objects::nonNull)
+                                .filter(Objects::nonNull)
-					.map(p -> new Tuple2<>(pidToComparableString(p), a)))
+                                .map(p -> new Tuple2<>(pidToComparableString(p), a)))
-			.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
+                .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
-		// <pid, Author> (list of pid that are missing in the other list)
+        // <pid, Author> (list of pid that are missing in the other list)
-		final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
+        final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
-			.stream()
+                .stream()
-			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
+                .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
-			.flatMap(
+                .flatMap(
-				a -> a
+                        a -> a
-					.getPid()
+                                .getPid()
-					.stream()
+                                .stream()
-					.filter(Objects::nonNull)
+                                .filter(Objects::nonNull)
-					.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
+                                .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
-					.map(p -> new Tuple2<>(p, a)))
+                                .map(p -> new Tuple2<>(p, a)))
-			.collect(Collectors.toList());
+                .collect(Collectors.toList());
-		pidToEnrich
+        pidToEnrich
-			.forEach(
+                .forEach(
-				a -> {
+                        a -> {
-					Optional<Tuple2<Double, Author>> simAuthor = base
+                            Optional<Tuple2<Double, Author>> simAuthor = base
-						.stream()
+                                    .stream()
-						.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
+                                    .map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
-						.max(Comparator.comparing(Tuple2::_1));
+                                    .max(Comparator.comparing(Tuple2::_1));
-					if (simAuthor.isPresent()) {
+                            if (simAuthor.isPresent()) {
-						double th = threshold;
+                                double th = threshold;
-						// increase the threshold if the surname is too short
+                                // increase the threshold if the surname is too short
-						if (simAuthor.get()._2().getSurname() != null
+                                if (simAuthor.get()._2().getSurname() != null
-							&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
+                                        && simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
-							th = 0.99;
+                                    th = 0.99;
-						if (simAuthor.get()._1() > th) {
+                                if (simAuthor.get()._1() > th) {
-							Author r = simAuthor.get()._2();
+                                    Author r = simAuthor.get()._2();
-							if (r.getPid() == null) {
+                                    if (r.getPid() == null) {
-								r.setPid(new ArrayList<>());
+                                        r.setPid(new ArrayList<>());
-							}
+                                    }
-							// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
+                                    // TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
-							// it creates of fixed size, and the add method raise UnsupportedOperationException at
+                                    // it creates of fixed size, and the add method raise UnsupportedOperationException at
-							// java.util.AbstractList.add
+                                    // java.util.AbstractList.add
-							final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
+                                    final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
-							tmp.add(a._1());
+                                    tmp.add(a._1());
-							r.setPid(tmp);
+                                    r.setPid(tmp);
-						}
+                                }
-					}
+                            }
-				});
+                        });
-	}
+    }
-	public static String normalizeFullName(final String fullname) {
+    public static String normalizeFullName(final String fullname) {
-		return nfd(fullname)
+        return nfd(fullname)
-			.toLowerCase()
+                .toLowerCase()
-			// do not compact the regexes in a single expression, would cause StackOverflowError
+                // do not compact the regexes in a single expression, would cause StackOverflowError
-			// in case
+                // in case
-			// of large input strings
+                // of large input strings
-			.replaceAll("(\\W)+", " ")
+                .replaceAll("(\\W)+", " ")
-			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+                .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
-			.replaceAll("(\\p{Punct})+", " ")
+                .replaceAll("(\\p{Punct})+", " ")
-			.replaceAll("(\\d)+", " ")
+                .replaceAll("(\\d)+", " ")
-			.replaceAll("(\\n)+", " ")
+                .replaceAll("(\\n)+", " ")
-			.trim();
+                .trim();
-	}
+    }
-	static int hammingDist(String str1, String str2) {
+    static int hammingDist(String str1, String str2) {
-		if (str1.length() != str2.length())
+        if (str1.length() != str2.length())
-			return Math.max(str1.length(), str2.length());
+            return Math.max(str1.length(), str2.length());
-		int i = 0, count = 0;
+        int i = 0, count = 0;
-		while (i < str1.length()) {
+        while (i < str1.length()) {
-			if (str1.charAt(i) != str2.charAt(i))
+            if (str1.charAt(i) != str2.charAt(i))
-				count++;
+                count++;
-			i++;
+            i++;
-		}
+        }
-		return count;
+        return count;
-	}
+    }
-	private static String authorFieldToBeCompared(Author author) {
+    private static String authorFieldToBeCompared(Author author) {
-		if (StringUtils.isNotBlank(author.getSurname())) {
+        if (StringUtils.isNotBlank(author.getSurname())) {
-			return author.getSurname();
+            return author.getSurname();
-		}
+        }
-		if (StringUtils.isNotBlank(author.getFullname())) {
+        if (StringUtils.isNotBlank(author.getFullname())) {
-			return author.getFullname();
+            return author.getFullname();
-		}
+        }
-		return null;
+        return null;
-	}
+    }
-	/**
+    /**
-	 * This method tries to figure out when two author are the same in the contest
+     * This method tries to figure out when two author are the same in the contest
-	 * of ORCID enrichment
+     * of ORCID enrichment
-	 * @param left Author in the OAF entity
+     *
-	 * @param right Author ORCID
+     * @param left  Author in the OAF entity
-	 * @return based on a heuristic on the names of the authors if they are the same.
+     * @param right Author ORCID
-	 */
+     * @return based on a heuristic on the names of the authors if they are the same.
-	public static boolean checkORCIDSimilarity(final Author left, final Author right) {
+     */
-		final Person pl = parse(left);
+    public static boolean checkORCIDSimilarity(final Author left, final Author right) {
-		final Person pr = parse(right);
+        final Person pl = parse(left);
        final Person pr = parse(right);
-		// If one of them didn't have a surname we verify if they have the fullName not empty
+        // If one of them didn't have a surname we verify if they have the fullName not empty
-		// and verify if the normalized version is equal
+        // and verify if the normalized version is equal
-		if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
+        if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
-			pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
+                pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
-			if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
+            if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
-				&& !pr.getFullname().isEmpty()) {
+                    && !pr.getFullname().isEmpty()) {
-				return pl
+                return pl
-					.getFullname()
+                        .getFullname()
-					.stream()
+                        .stream()
-					.anyMatch(
+                        .anyMatch(
-						fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
+                                fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
-			} else {
+            } else {
-				return false;
+                return false;
-			}
+            }
-		}
+        }
-		// The Authors have one surname in common
+        // The Authors have one surname in common
-		if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
+        if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
-			// If one of them has only a surname and is the same we can say that they are the same author
+            // If one of them has only a surname and is the same we can say that they are the same author
-			if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
+            if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
-				(pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
+                    (pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
-				return true;
+                return true;
-			// The authors have the same initials of Name in common
+            // The authors have the same initials of Name in common
-			if (pl
+            if (pl
-				.getName()
+                    .getName()
-				.stream()
+                    .stream()
-				.anyMatch(
+                    .anyMatch(
-					nl -> pr
+                            nl -> pr
-						.getName()
+                                    .getName()
-						.stream()
+                                    .stream()
-						.anyMatch(nr -> nr.equalsIgnoreCase(nl))))
+                                    .anyMatch(nr -> nr.equalsIgnoreCase(nl))))
-				return true;
+                return true;
-		}
+        }
-		// Sometimes we noticed that publication have author wrote in inverse order Surname, Name
+        // Sometimes we noticed that publication have author wrote in inverse order Surname, Name
-		// We verify if we have an exact match between name and surname
+        // We verify if we have an exact match between name and surname
-		if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
+        if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
-			pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
+                pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
-			return true;
+            return true;
-		else
+        else
-			return false;
+            return false;
-	}
+    }
-	//
+    //
-	/**
+    /**
-	 * Method to enrich ORCID information in one list of authors based on another list
+     * Method to enrich ORCID information in one list of authors based on another list
-	 * @param baseAuthor the Author List in the OAF Entity
+     *
-	 * @param orcidAuthor The list of ORCID Author intersected
+     * @param baseAuthor  the Author List in the OAF Entity
-	 * @return The Author List of the OAF Entity enriched with the orcid Author
+     * @param orcidAuthor The list of ORCID Author intersected
-	 */
+     * @return The Author List of the OAF Entity enriched with the orcid Author
-	public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
+     */
    public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
-		if (baseAuthor == null || baseAuthor.isEmpty())
+        if (baseAuthor == null || baseAuthor.isEmpty())
-			return orcidAuthor;
+            return orcidAuthor;
-		if (orcidAuthor == null || orcidAuthor.isEmpty())
+        if (orcidAuthor == null || orcidAuthor.isEmpty())
-			return baseAuthor;
+            return baseAuthor;
-		if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
+        if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
-			return baseAuthor;
+            return baseAuthor;
-		final List<Author> oAuthor = new ArrayList<>();
+        final List<Author> oAuthor = new ArrayList<>();
-		oAuthor.addAll(orcidAuthor);
+        oAuthor.addAll(orcidAuthor);
-		baseAuthor.forEach(ba -> {
+        baseAuthor.forEach(ba -> {
-			Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
+            Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
-			if (aMatch.isPresent()) {
+            if (aMatch.isPresent()) {
-				final Author sameAuthor = aMatch.get();
+                final Author sameAuthor = aMatch.get();
-				addPid(ba, sameAuthor.getPid());
+                addPid(ba, sameAuthor.getPid());
-				oAuthor.remove(sameAuthor);
+                oAuthor.remove(sameAuthor);
-			}
+            }
-		});
+        });
-		return baseAuthor;
+        return baseAuthor;
-	}
+    }
-	private static void addPid(final Author a, final List<StructuredProperty> pids) {
+    private static void addPid(final Author a, final List<StructuredProperty> pids) {
-		if (a.getPid() == null) {
+        if (a.getPid() == null) {
-			a.setPid(new ArrayList<>());
+            a.setPid(new ArrayList<>());
-		}
+        }
-		a.getPid().addAll(pids);
+        a.getPid().addAll(pids);
-	}
+    }
-	public static String pidToComparableString(StructuredProperty pid) {
+    public static String pidToComparableString(StructuredProperty pid) {
-		final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
+        final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
-			: "";
+                : "";
-		return (pid.getQualifier() != null ? classid : "")
+        return (pid.getQualifier() != null ? classid : "")
-			+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
+                + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
-	}
+    }
-	public static int countAuthorsPids(List<Author> authors) {
+    public static int countAuthorsPids(List<Author> authors) {
-		if (authors == null)
+        if (authors == null)
-			return 0;
+            return 0;
-		return (int) authors.stream().filter(AuthorMerger::hasPid).count();
+        return (int) authors.stream().filter(AuthorMerger::hasPid).count();
-	}
+    }
-	private static int authorsSize(List<Author> authors) {
+    private static int authorsSize(List<Author> authors) {
-		if (authors == null)
+        if (authors == null)
-			return 0;
+            return 0;
-		return authors.size();
+        return authors.size();
-	}
+    }
-	private static Double sim(Author a, Author b) {
+    private static Double sim(Author a, Author b) {
-		final Person pa = parse(a);
+        final Person pa = parse(a);
-		final Person pb = parse(b);
+        final Person pb = parse(b);
-		// if both are accurate (e.g. they have name and surname)
+        // if both are accurate (e.g. they have name and surname)
-		if (pa.isAccurate() & pb.isAccurate()) {
+        if (pa.isAccurate() & pb.isAccurate()) {
-			return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
+            return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
-				+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
+                    + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
-		} else {
+        } else {
-			return new JaroWinkler()
+            return new JaroWinkler()
-				.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
+                    .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
-		}
+        }
-	}
+    }
-	private static boolean hasPid(Author a) {
+    private static boolean hasPid(Author a) {
-		if (a == null || a.getPid() == null || a.getPid().isEmpty())
+        if (a == null || a.getPid() == null || a.getPid().isEmpty())
-			return false;
+            return false;
-		return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
+        return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
-	}
+    }
-	private static Person parse(Author author) {
+    private static Person parse(Author author) {
-		if (StringUtils.isNotBlank(author.getSurname())) {
+        if (StringUtils.isNotBlank(author.getSurname())) {
-			return new Person(author.getSurname() + ", " + author.getName(), false);
+            return new Person(author.getSurname() + ", " + author.getName(), false);
-		} else {
+        } else {
-			if (StringUtils.isNotBlank(author.getFullname()))
+            if (StringUtils.isNotBlank(author.getFullname()))
-				return new Person(author.getFullname(), false);
+                return new Person(author.getFullname(), false);
-			else
+            else
-				return new Person("", false);
+                return new Person("", false);
-		}
+        }
-	}
+    }
-	public static String normalize(final String s) {
+    public static String normalize(final String s) {
-		String[] normalized = nfd(s)
+        String[] normalized = nfd(s)
-			.toLowerCase()
+                .toLowerCase()
-			// do not compact the regexes in a single expression, would cause StackOverflowError
+                // do not compact the regexes in a single expression, would cause StackOverflowError
-			// in case
+                // in case
-			// of large input strings
+                // of large input strings
-			.replaceAll("(\\W)+", " ")
+                .replaceAll("(\\W)+", " ")
-			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+                .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
-			.replaceAll("(\\p{Punct})+", " ")
+                .replaceAll("(\\p{Punct})+", " ")
-			.replaceAll("(\\d)+", " ")
+                .replaceAll("(\\d)+", " ")
-			.replaceAll("(\\n)+", " ")
+                .replaceAll("(\\n)+", " ")
-			.trim()
+                .trim()
-			.split(" ");
+                .split(" ");
-		Arrays.sort(normalized);
+        Arrays.sort(normalized);
-		return String.join(" ", normalized);
+        return String.join(" ", normalized);
-	}
+    }
-	private static String nfd(final String s) {
+    private static String nfd(final String s) {
-		return Normalizer.normalize(s, Normalizer.Form.NFD);
+        return Normalizer.normalize(s, Normalizer.Form.NFD);
-	}
+    }
 }
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
@ -79,8 +79,8 @@ public class PrepareAffiliationRelationsTest {
 			.getPath();
 		String pubmedAffiliationRelationsPath = getClass()
-				.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
+			.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
-				.getPath();
+			.getPath();
 		String outputPath = workingDir.toString() + "/actionSet";
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -92,7 +92,6 @@ object SparkGenerateDoiBoost {
      .mode(SaveMode.Overwrite)
      .save(s"$workingDirPath/firstJoin")
    logger.info("Phase 2) Join Result with MAG")
    val sj: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
@ -73,7 +73,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
        col("id").alias("dnet_id")
      )
-  val orcidDnet = orcidPublication
+    val orcidDnet = orcidPublication
      .join(
        entities,
        lower(col("schema")).equalTo(lower(col("pid_schema"))) &&
--- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala
@ -6,13 +6,10 @@ import org.junit.jupiter.api.Test
 import org.slf4j.{Logger, LoggerFactory}
 import org.apache.spark.sql.functions._
 class EnrichOrcidTest {
  val log: Logger = LoggerFactory.getLogger(getClass)
  def test() = {
    val spark = SparkSession.builder().master("local[*]").getOrCreate()
 //    spark.sparkContext.setLogLevel("ERROR")
@ -63,8 +60,7 @@ class EnrichOrcidTest {
 //      }).filter(author => author != null)
 //    })
-
+    Encoders
  Encoders
    import spark.implicits._
 //    val enriched = spark.read
@ -76,10 +72,6 @@ class EnrichOrcidTest {
 //
 //      .show()
  }
 }