diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index 62e6764b7..d7224af78 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -4,352 +4,329 @@ package eu.dnetlib.dhp.oa.merge; import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; - import org.apache.commons.lang3.StringUtils; -import org.jetbrains.annotations.NotNull; - import com.wcohen.ss.JaroWinkler; - import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.model.Person; import scala.Tuple2; -class SimilarityCellInfo implements Comparable { - - public int authorPosition = 0; - public int orcidPosition = 0; - - public double maxColumnSimilarity = 0.0; - - public SimilarityCellInfo() { - } - - public void setValues(final int authPos, final int orcidPos, final double similarity) { - this.authorPosition = authPos; - this.orcidPosition = orcidPos; - this.maxColumnSimilarity = similarity; - } - - @Override - public int compareTo(@NotNull SimilarityCellInfo o) { - return Double.compare(maxColumnSimilarity, o.maxColumnSimilarity); - } -} public class AuthorMerger { - private static final Double THRESHOLD = 0.95; + private static final Double THRESHOLD = 0.95; - private AuthorMerger() { - } + private AuthorMerger() { + } - public static List merge(List> authors) { + public static List merge(List> authors) { - authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); + authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); - List author = new ArrayList<>(); + List author = new ArrayList<>(); - for (List a : authors) { - author = mergeAuthor(author, a); - } + for (List a : authors) { + author = mergeAuthor(author, a); + } - return author; + return author; - } + } - public static List mergeAuthor(final List a, final List b, Double threshold) { - int pa = countAuthorsPids(a); - int pb = countAuthorsPids(b); - List base; - List enrich; - int sa = authorsSize(a); - int sb = authorsSize(b); + public static List mergeAuthor(final List a, final List b, Double threshold) { + int pa = countAuthorsPids(a); + int pb = countAuthorsPids(b); + List base; + List enrich; + int sa = authorsSize(a); + int sb = authorsSize(b); - if (sa == sb) { - base = pa > pb ? a : b; - enrich = pa > pb ? b : a; - } else { - base = sa > sb ? a : b; - enrich = sa > sb ? b : a; - } - enrichPidFromList(base, enrich, threshold); - return base; - } + if (sa == sb) { + base = pa > pb ? a : b; + enrich = pa > pb ? b : a; + } else { + base = sa > sb ? a : b; + enrich = sa > sb ? b : a; + } + enrichPidFromList(base, enrich, threshold); + return base; + } - public static List mergeAuthor(final List a, final List b) { - return mergeAuthor(a, b, THRESHOLD); - } + public static List mergeAuthor(final List a, final List b) { + return mergeAuthor(a, b, THRESHOLD); + } - private static void enrichPidFromList(List base, List enrich, Double threshold) { - if (base == null || enrich == null) - return; + private static void enrichPidFromList(List base, List enrich, Double threshold) { + if (base == null || enrich == null) + return; - // (if an Author has more than 1 pid, it appears 2 times in the list) - final Map basePidAuthorMap = base - .stream() - .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) - .flatMap( - a -> a - .getPid() - .stream() - .filter(Objects::nonNull) - .map(p -> new Tuple2<>(pidToComparableString(p), a))) - .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); + // (if an Author has more than 1 pid, it appears 2 times in the list) + final Map basePidAuthorMap = base + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .flatMap( + a -> a + .getPid() + .stream() + .filter(Objects::nonNull) + .map(p -> new Tuple2<>(pidToComparableString(p), a))) + .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); - // (list of pid that are missing in the other list) - final List> pidToEnrich = enrich - .stream() - .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) - .flatMap( - a -> a - .getPid() - .stream() - .filter(Objects::nonNull) - .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) - .map(p -> new Tuple2<>(p, a))) - .collect(Collectors.toList()); + // (list of pid that are missing in the other list) + final List> pidToEnrich = enrich + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .flatMap( + a -> a + .getPid() + .stream() + .filter(Objects::nonNull) + .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) + .map(p -> new Tuple2<>(p, a))) + .collect(Collectors.toList()); - pidToEnrich - .forEach( - a -> { - Optional> simAuthor = base - .stream() - .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) - .max(Comparator.comparing(Tuple2::_1)); + pidToEnrich + .forEach( + a -> { + Optional> simAuthor = base + .stream() + .map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) + .max(Comparator.comparing(Tuple2::_1)); - if (simAuthor.isPresent()) { - double th = threshold; - // increase the threshold if the surname is too short - if (simAuthor.get()._2().getSurname() != null - && simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0) - th = 0.99; + if (simAuthor.isPresent()) { + double th = threshold; + // increase the threshold if the surname is too short + if (simAuthor.get()._2().getSurname() != null + && simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0) + th = 0.99; - if (simAuthor.get()._1() > th) { - Author r = simAuthor.get()._2(); - if (r.getPid() == null) { - r.setPid(new ArrayList<>()); - } + if (simAuthor.get()._1() > th) { + Author r = simAuthor.get()._2(); + if (r.getPid() == null) { + r.setPid(new ArrayList<>()); + } - // TERRIBLE HACK but for some reason when we create and Array with Arrays.asList, - // it creates of fixed size, and the add method raise UnsupportedOperationException at - // java.util.AbstractList.add - final List tmp = new ArrayList<>(r.getPid()); - tmp.add(a._1()); - r.setPid(tmp); - } - } - }); - } + // TERRIBLE HACK but for some reason when we create and Array with Arrays.asList, + // it creates of fixed size, and the add method raise UnsupportedOperationException at + // java.util.AbstractList.add + final List tmp = new ArrayList<>(r.getPid()); + tmp.add(a._1()); + r.setPid(tmp); + } + } + }); + } - public static String normalizeFullName(final String fullname) { - return nfd(fullname) - .toLowerCase() - // do not compact the regexes in a single expression, would cause StackOverflowError - // in case - // of large input strings - .replaceAll("(\\W)+", " ") - .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") - .replaceAll("(\\p{Punct})+", " ") - .replaceAll("(\\d)+", " ") - .replaceAll("(\\n)+", " ") + public static String normalizeFullName(final String fullname) { + return nfd(fullname) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") - .trim(); - } + .trim(); + } - static int hammingDist(String str1, String str2) { - if (str1.length() != str2.length()) - return Math.max(str1.length(), str2.length()); - int i = 0, count = 0; - while (i < str1.length()) { - if (str1.charAt(i) != str2.charAt(i)) - count++; - i++; - } - return count; - } + static int hammingDist(String str1, String str2) { + if (str1.length() != str2.length()) + return Math.max(str1.length(), str2.length()); + int i = 0, count = 0; + while (i < str1.length()) { + if (str1.charAt(i) != str2.charAt(i)) + count++; + i++; + } + return count; + } - private static String authorFieldToBeCompared(Author author) { - if (StringUtils.isNotBlank(author.getSurname())) { - return author.getSurname(); + private static String authorFieldToBeCompared(Author author) { + if (StringUtils.isNotBlank(author.getSurname())) { + return author.getSurname(); - } - if (StringUtils.isNotBlank(author.getFullname())) { - return author.getFullname(); - } - return null; - } + } + if (StringUtils.isNotBlank(author.getFullname())) { + return author.getFullname(); + } + return null; + } - /** - * This method tries to figure out when two author are the same in the contest - * of ORCID enrichment - * @param left Author in the OAF entity - * @param right Author ORCID - * @return based on a heuristic on the names of the authors if they are the same. - */ - public static boolean checkORCIDSimilarity(final Author left, final Author right) { - final Person pl = parse(left); - final Person pr = parse(right); + /** + * This method tries to figure out when two author are the same in the contest + * of ORCID enrichment + * + * @param left Author in the OAF entity + * @param right Author ORCID + * @return based on a heuristic on the names of the authors if they are the same. + */ + public static boolean checkORCIDSimilarity(final Author left, final Author right) { + final Person pl = parse(left); + final Person pr = parse(right); - // If one of them didn't have a surname we verify if they have the fullName not empty - // and verify if the normalized version is equal - if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) && - pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) { + // If one of them didn't have a surname we verify if they have the fullName not empty + // and verify if the normalized version is equal + if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) && + pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) { - if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null - && !pr.getFullname().isEmpty()) { - return pl - .getFullname() - .stream() - .anyMatch( - fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr)))); - } else { - return false; - } - } - // The Authors have one surname in common - if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) { + if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null + && !pr.getFullname().isEmpty()) { + return pl + .getFullname() + .stream() + .anyMatch( + fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr)))); + } else { + return false; + } + } + // The Authors have one surname in common + if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) { - // If one of them has only a surname and is the same we can say that they are the same author - if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) || - (pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank))) - return true; - // The authors have the same initials of Name in common - if (pl - .getName() - .stream() - .anyMatch( - nl -> pr - .getName() - .stream() - .anyMatch(nr -> nr.equalsIgnoreCase(nl)))) - return true; - } + // If one of them has only a surname and is the same we can say that they are the same author + if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) || + (pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank))) + return true; + // The authors have the same initials of Name in common + if (pl + .getName() + .stream() + .anyMatch( + nl -> pr + .getName() + .stream() + .anyMatch(nr -> nr.equalsIgnoreCase(nl)))) + return true; + } - // Sometimes we noticed that publication have author wrote in inverse order Surname, Name - // We verify if we have an exact match between name and surname - if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) && - pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl)))) - return true; - else - return false; - } - // + // Sometimes we noticed that publication have author wrote in inverse order Surname, Name + // We verify if we have an exact match between name and surname + if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) && + pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl)))) + return true; + else + return false; + } + // - /** - * Method to enrich ORCID information in one list of authors based on another list - * @param baseAuthor the Author List in the OAF Entity - * @param orcidAuthor The list of ORCID Author intersected - * @return The Author List of the OAF Entity enriched with the orcid Author - */ - public static List enrichOrcid(List baseAuthor, List orcidAuthor) { + /** + * Method to enrich ORCID information in one list of authors based on another list + * + * @param baseAuthor the Author List in the OAF Entity + * @param orcidAuthor The list of ORCID Author intersected + * @return The Author List of the OAF Entity enriched with the orcid Author + */ + public static List enrichOrcid(List baseAuthor, List orcidAuthor) { - if (baseAuthor == null || baseAuthor.isEmpty()) - return orcidAuthor; + if (baseAuthor == null || baseAuthor.isEmpty()) + return orcidAuthor; - if (orcidAuthor == null || orcidAuthor.isEmpty()) - return baseAuthor; + if (orcidAuthor == null || orcidAuthor.isEmpty()) + return baseAuthor; - if (baseAuthor.size() == 1 && orcidAuthor.size() > 10) - return baseAuthor; + if (baseAuthor.size() == 1 && orcidAuthor.size() > 10) + return baseAuthor; - final List oAuthor = new ArrayList<>(); - oAuthor.addAll(orcidAuthor); + final List oAuthor = new ArrayList<>(); + oAuthor.addAll(orcidAuthor); - baseAuthor.forEach(ba -> { - Optional aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst(); - if (aMatch.isPresent()) { - final Author sameAuthor = aMatch.get(); - addPid(ba, sameAuthor.getPid()); - oAuthor.remove(sameAuthor); - } - }); - return baseAuthor; - } + baseAuthor.forEach(ba -> { + Optional aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst(); + if (aMatch.isPresent()) { + final Author sameAuthor = aMatch.get(); + addPid(ba, sameAuthor.getPid()); + oAuthor.remove(sameAuthor); + } + }); + return baseAuthor; + } - private static void addPid(final Author a, final List pids) { + private static void addPid(final Author a, final List pids) { - if (a.getPid() == null) { - a.setPid(new ArrayList<>()); - } + if (a.getPid() == null) { + a.setPid(new ArrayList<>()); + } - a.getPid().addAll(pids); + a.getPid().addAll(pids); - } + } - public static String pidToComparableString(StructuredProperty pid) { - final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() - : ""; - return (pid.getQualifier() != null ? classid : "") - + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); - } + public static String pidToComparableString(StructuredProperty pid) { + final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() + : ""; + return (pid.getQualifier() != null ? classid : "") + + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + } - public static int countAuthorsPids(List authors) { - if (authors == null) - return 0; + public static int countAuthorsPids(List authors) { + if (authors == null) + return 0; - return (int) authors.stream().filter(AuthorMerger::hasPid).count(); - } + return (int) authors.stream().filter(AuthorMerger::hasPid).count(); + } - private static int authorsSize(List authors) { - if (authors == null) - return 0; - return authors.size(); - } + private static int authorsSize(List authors) { + if (authors == null) + return 0; + return authors.size(); + } - private static Double sim(Author a, Author b) { + private static Double sim(Author a, Author b) { - final Person pa = parse(a); - final Person pb = parse(b); + final Person pa = parse(a); + final Person pb = parse(b); - // if both are accurate (e.g. they have name and surname) - if (pa.isAccurate() & pb.isAccurate()) { - return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5 - + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5; - } else { - return new JaroWinkler() - .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); - } - } + // if both are accurate (e.g. they have name and surname) + if (pa.isAccurate() & pb.isAccurate()) { + return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5 + + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5; + } else { + return new JaroWinkler() + .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); + } + } - private static boolean hasPid(Author a) { - if (a == null || a.getPid() == null || a.getPid().isEmpty()) - return false; - return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); - } + private static boolean hasPid(Author a) { + if (a == null || a.getPid() == null || a.getPid().isEmpty()) + return false; + return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); + } - private static Person parse(Author author) { - if (StringUtils.isNotBlank(author.getSurname())) { - return new Person(author.getSurname() + ", " + author.getName(), false); - } else { - if (StringUtils.isNotBlank(author.getFullname())) - return new Person(author.getFullname(), false); - else - return new Person("", false); - } - } + private static Person parse(Author author) { + if (StringUtils.isNotBlank(author.getSurname())) { + return new Person(author.getSurname() + ", " + author.getName(), false); + } else { + if (StringUtils.isNotBlank(author.getFullname())) + return new Person(author.getFullname(), false); + else + return new Person("", false); + } + } - public static String normalize(final String s) { - String[] normalized = nfd(s) - .toLowerCase() - // do not compact the regexes in a single expression, would cause StackOverflowError - // in case - // of large input strings - .replaceAll("(\\W)+", " ") - .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") - .replaceAll("(\\p{Punct})+", " ") - .replaceAll("(\\d)+", " ") - .replaceAll("(\\n)+", " ") - .trim() - .split(" "); + public static String normalize(final String s) { + String[] normalized = nfd(s) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim() + .split(" "); - Arrays.sort(normalized); + Arrays.sort(normalized); - return String.join(" ", normalized); - } + return String.join(" ", normalized); + } - private static String nfd(final String s) { - return Normalizer.normalize(s, Normalizer.Form.NFD); - } + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java index e2639996c..b87738879 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java @@ -79,8 +79,8 @@ public class PrepareAffiliationRelationsTest { .getPath(); String pubmedAffiliationRelationsPath = getClass() - .getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json") + .getPath(); String outputPath = workingDir.toString() + "/actionSet"; diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 07d6a0287..9ffaeeeef 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -92,7 +92,6 @@ object SparkGenerateDoiBoost { .mode(SaveMode.Overwrite) .save(s"$workingDirPath/firstJoin") - logger.info("Phase 2) Join Result with MAG") val sj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala index 0d994d202..645df8181 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala @@ -73,7 +73,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String] col("id").alias("dnet_id") ) - val orcidDnet = orcidPublication + val orcidDnet = orcidPublication .join( entities, lower(col("schema")).equalTo(lower(col("pid_schema"))) && diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala index 84483b1a2..cb05332b2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala @@ -6,13 +6,10 @@ import org.junit.jupiter.api.Test import org.slf4j.{Logger, LoggerFactory} import org.apache.spark.sql.functions._ - class EnrichOrcidTest { val log: Logger = LoggerFactory.getLogger(getClass) - - def test() = { val spark = SparkSession.builder().master("local[*]").getOrCreate() // spark.sparkContext.setLogLevel("ERROR") @@ -63,8 +60,7 @@ class EnrichOrcidTest { // }).filter(author => author != null) // }) - - Encoders + Encoders import spark.implicits._ // val enriched = spark.read @@ -76,10 +72,6 @@ class EnrichOrcidTest { // // .show() - - - - } }