Merge pull request 'ORCID Enrichment and Download' (#364) from orcid_import into beta

Reviewed-on: D-Net/dnet-hadoop#364
2023-12-01 15:05:44 +01:00 · 2023-12-01 15:05:44 +01:00 · c5ac593c07
parent 93a700742a 09d061e90b
commit c5ac593c07
43 changed files with 3251 additions and 229 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -4,194 +4,318 @@ package eu.dnetlib.dhp.oa.merge;
 import java.text.Normalizer;
 import java.util.*;
 import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
 import com.wcohen.ss.JaroWinkler;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.pace.model.Person;
 import scala.Tuple2;
 public class AuthorMerger {
-	private static final Double THRESHOLD = 0.95;
+    private static final Double THRESHOLD = 0.95;
-	private AuthorMerger() {
+    private AuthorMerger() {
-	}
+    }
-	public static List<Author> merge(List<List<Author>> authors) {
+    public static List<Author> merge(List<List<Author>> authors) {
-		authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
+        authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
-		List<Author> author = new ArrayList<>();
+        List<Author> author = new ArrayList<>();
-		for (List<Author> a : authors) {
+        for (List<Author> a : authors) {
-			author = mergeAuthor(author, a);
+            author = mergeAuthor(author, a);
-		}
+        }
-		return author;
+        return author;
-	}
+    }
-	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
+    public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
-		int pa = countAuthorsPids(a);
+        int pa = countAuthorsPids(a);
-		int pb = countAuthorsPids(b);
+        int pb = countAuthorsPids(b);
-		List<Author> base;
+        List<Author> base;
-		List<Author> enrich;
+        List<Author> enrich;
-		int sa = authorsSize(a);
+        int sa = authorsSize(a);
-		int sb = authorsSize(b);
+        int sb = authorsSize(b);
-		if (sa == sb) {
+        if (sa == sb) {
-			base = pa > pb ? a : b;
+            base = pa > pb ? a : b;
-			enrich = pa > pb ? b : a;
+            enrich = pa > pb ? b : a;
-		} else {
+        } else {
-			base = sa > sb ? a : b;
+            base = sa > sb ? a : b;
-			enrich = sa > sb ? b : a;
+            enrich = sa > sb ? b : a;
-		}
+        }
-		enrichPidFromList(base, enrich, threshold);
+        enrichPidFromList(base, enrich, threshold);
-		return base;
+        return base;
-	}
+    }
-	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
+    public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
-		return mergeAuthor(a, b, THRESHOLD);
+        return mergeAuthor(a, b, THRESHOLD);
-	}
+    }
-	private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
+    private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
-		if (base == null || enrich == null)
+        if (base == null || enrich == null)
-			return;
+            return;
-		// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
+        // <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
-		final Map<String, Author> basePidAuthorMap = base
+        final Map<String, Author> basePidAuthorMap = base
-			.stream()
+                .stream()
-			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
+                .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
-			.flatMap(
+                .flatMap(
-				a -> a
+                        a -> a
-					.getPid()
+                                .getPid()
-					.stream()
+                                .stream()
-					.filter(Objects::nonNull)
+                                .filter(Objects::nonNull)
-					.map(p -> new Tuple2<>(pidToComparableString(p), a)))
+                                .map(p -> new Tuple2<>(pidToComparableString(p), a)))
-			.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
+                .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
-		// <pid, Author> (list of pid that are missing in the other list)
+        // <pid, Author> (list of pid that are missing in the other list)
-		final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
+        final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
-			.stream()
+                .stream()
-			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
+                .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
-			.flatMap(
+                .flatMap(
-				a -> a
+                        a -> a
-					.getPid()
+                                .getPid()
-					.stream()
+                                .stream()
-					.filter(Objects::nonNull)
+                                .filter(Objects::nonNull)
-					.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
+                                .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
-					.map(p -> new Tuple2<>(p, a)))
+                                .map(p -> new Tuple2<>(p, a)))
-			.collect(Collectors.toList());
+                .collect(Collectors.toList());
-		pidToEnrich
+        pidToEnrich
-			.forEach(
+                .forEach(
-				a -> {
+                        a -> {
-					Optional<Tuple2<Double, Author>> simAuthor = base
+                            Optional<Tuple2<Double, Author>> simAuthor = base
-						.stream()
+                                    .stream()
-						.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
+                                    .map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
-						.max(Comparator.comparing(Tuple2::_1));
+                                    .max(Comparator.comparing(Tuple2::_1));
-					if (simAuthor.isPresent()) {
+                            if (simAuthor.isPresent()) {
-						double th = threshold;
+                                double th = threshold;
-						// increase the threshold if the surname is too short
+                                // increase the threshold if the surname is too short
-						if (simAuthor.get()._2().getSurname() != null
+                                if (simAuthor.get()._2().getSurname() != null
-							&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
+                                        && simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
-							th = 0.99;
+                                    th = 0.99;
-						if (simAuthor.get()._1() > th) {
+                                if (simAuthor.get()._1() > th) {
-							Author r = simAuthor.get()._2();
+                                    Author r = simAuthor.get()._2();
-							if (r.getPid() == null) {
+                                    if (r.getPid() == null) {
-								r.setPid(new ArrayList<>());
+                                        r.setPid(new ArrayList<>());
-							}
+                                    }
-							// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
+                                    // TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
-							// it creates of fixed size, and the add method raise UnsupportedOperationException at
+                                    // it creates of fixed size, and the add method raise UnsupportedOperationException at
-							// java.util.AbstractList.add
+                                    // java.util.AbstractList.add
-							final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
+                                    final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
-							tmp.add(a._1());
+                                    tmp.add(a._1());
-							r.setPid(tmp);
+                                    r.setPid(tmp);
-						}
+                                }
-					}
+                            }
-				});
+                        });
-	}
+    }
-	public static String pidToComparableString(StructuredProperty pid) {
+    public static String normalizeFullName(final String fullname) {
-		final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
+        return nfd(fullname)
-			: "";
+                .toLowerCase()
-		return (pid.getQualifier() != null ? classid : "")
+                // do not compact the regexes in a single expression, would cause StackOverflowError
-			+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
+                // in case
-	}
+                // of large input strings
                .replaceAll("(\\W)+", " ")
                .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
                .replaceAll("(\\p{Punct})+", " ")
                .replaceAll("(\\d)+", " ")
                .replaceAll("(\\n)+", " ")
-	public static int countAuthorsPids(List<Author> authors) {
+                .trim();
-		if (authors == null)
+    }
 			return 0;
 		return (int) authors.stream().filter(AuthorMerger::hasPid).count();
 	}
-	private static int authorsSize(List<Author> authors) {
+    private static String authorFieldToBeCompared(Author author) {
-		if (authors == null)
+        if (StringUtils.isNotBlank(author.getSurname())) {
-			return 0;
+            return author.getSurname();
 		return authors.size();
 	}
-	private static Double sim(Author a, Author b) {
+        }
        if (StringUtils.isNotBlank(author.getFullname())) {
            return author.getFullname();
        }
        return null;
    }
-		final Person pa = parse(a);
+    /**
-		final Person pb = parse(b);
+     * This method tries to figure out when two author are the same in the contest
     * of ORCID enrichment
     *
     * @param left  Author in the OAF entity
     * @param right Author ORCID
     * @return based on a heuristic on the names of the authors if they are the same.
     */
    public static boolean checkORCIDSimilarity(final Author left, final Author right) {
        final Person pl = parse(left);
        final Person pr = parse(right);
-		// if both are accurate (e.g. they have name and surname)
+        // If one of them didn't have a surname we verify if they have the fullName not empty
-		if (pa.isAccurate() & pb.isAccurate()) {
+        // and verify if the normalized version is equal
-			return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
+        if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
-				+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
+                pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
 		} else {
 			return new JaroWinkler()
 				.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
 		}
 	}
-	private static boolean hasPid(Author a) {
+            if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
-		if (a == null || a.getPid() == null || a.getPid().isEmpty())
+                    && !pr.getFullname().isEmpty()) {
-			return false;
+                return pl
-		return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
+                        .getFullname()
-	}
+                        .stream()
                        .anyMatch(
                                fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
            } else {
                return false;
            }
        }
        // The Authors have one surname in common
        if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
-	private static Person parse(Author author) {
+            // If one of them has only a surname and is the same we can say that they are the same author
-		if (StringUtils.isNotBlank(author.getSurname())) {
+            if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
-			return new Person(author.getSurname() + ", " + author.getName(), false);
+                    (pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
-		} else {
+                return true;
-			if (StringUtils.isNotBlank(author.getFullname()))
+            // The authors have the same initials of Name in common
-				return new Person(author.getFullname(), false);
+            if (pl
-			else
+                    .getName()
-				return new Person("", false);
+                    .stream()
-		}
+                    .anyMatch(
-	}
+                            nl -> pr
                                    .getName()
                                    .stream()
                                    .anyMatch(nr -> nr.equalsIgnoreCase(nl))))
                return true;
        }
-	private static String normalize(final String s) {
+        // Sometimes we noticed that publication have author wrote in inverse order Surname, Name
-		String[] normalized = nfd(s)
+        // We verify if we have an exact match between name and surname
-			.toLowerCase()
+        if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
-			// do not compact the regexes in a single expression, would cause StackOverflowError
+                pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
-			// in case
+            return true;
-			// of large input strings
+        else
-			.replaceAll("(\\W)+", " ")
+            return false;
-			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+    }
-			.replaceAll("(\\p{Punct})+", " ")
+    //
 			.replaceAll("(\\d)+", " ")
 			.replaceAll("(\\n)+", " ")
 			.trim()
 			.split(" ");
-		Arrays.sort(normalized);
+    /**
     * Method to enrich ORCID information in one list of authors based on another list
     *
     * @param baseAuthor  the Author List in the OAF Entity
     * @param orcidAuthor The list of ORCID Author intersected
     * @return The Author List of the OAF Entity enriched with the orcid Author
     */
    public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {
-		return String.join(" ", normalized);
+        if (baseAuthor == null || baseAuthor.isEmpty())
-	}
+            return orcidAuthor;
-	private static String nfd(final String s) {
+        if (orcidAuthor == null || orcidAuthor.isEmpty())
-		return Normalizer.normalize(s, Normalizer.Form.NFD);
+            return baseAuthor;
-	}
+
        if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
            return baseAuthor;
        final List<Author> oAuthor = new ArrayList<>();
        oAuthor.addAll(orcidAuthor);
        baseAuthor.forEach(ba -> {
            Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
            if (aMatch.isPresent()) {
                final Author sameAuthor = aMatch.get();
                addPid(ba, sameAuthor.getPid());
                oAuthor.remove(sameAuthor);
            }
        });
        return baseAuthor;
    }
    private static void addPid(final Author a, final List<StructuredProperty> pids) {
        if (a.getPid() == null) {
            a.setPid(new ArrayList<>());
        }
        a.getPid().addAll(pids);
    }
    public static String pidToComparableString(StructuredProperty pid) {
        final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
                : "";
        return (pid.getQualifier() != null ? classid : "")
                + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
    }
    public static int countAuthorsPids(List<Author> authors) {
        if (authors == null)
            return 0;
        return (int) authors.stream().filter(AuthorMerger::hasPid).count();
    }
    private static int authorsSize(List<Author> authors) {
        if (authors == null)
            return 0;
        return authors.size();
    }
    private static Double sim(Author a, Author b) {
        final Person pa = parse(a);
        final Person pb = parse(b);
        // if both are accurate (e.g. they have name and surname)
        if (pa.isAccurate() & pb.isAccurate()) {
            return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
                    + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
        } else {
            return new JaroWinkler()
                    .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
        }
    }
    private static boolean hasPid(Author a) {
        if (a == null || a.getPid() == null || a.getPid().isEmpty())
            return false;
        return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
    }
    private static Person parse(Author author) {
        if (StringUtils.isNotBlank(author.getSurname())) {
            return new Person(author.getSurname() + ", " + author.getName(), false);
        } else {
            if (StringUtils.isNotBlank(author.getFullname()))
                return new Person(author.getFullname(), false);
            else
                return new Person("", false);
        }
    }
    public static String normalize(final String s) {
        String[] normalized = nfd(s)
                .toLowerCase()
                // do not compact the regexes in a single expression, would cause StackOverflowError
                // in case
                // of large input strings
                .replaceAll("(\\W)+", " ")
                .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
                .replaceAll("(\\p{Punct})+", " ")
                .replaceAll("(\\d)+", " ")
                .replaceAll("(\\n)+", " ")
                .trim()
                .split(" ");
        Arrays.sort(normalized);
        return String.join(" ", normalized);
    }
    private static String nfd(final String s) {
        return Normalizer.normalize(s, Normalizer.Form.NFD);
    }
 }
--- a/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
@ -0,0 +1,114 @@
 package eu.dnetlib.oa.merge;
 import static org.junit.jupiter.api.Assertions.*;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.util.List;
 import java.util.Objects;
 import org.junit.jupiter.api.Test;
 import org.junit.platform.commons.util.StringUtils;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.oa.merge.AuthorMerger;
 import eu.dnetlib.dhp.schema.oaf.Author;
 public class AuthorMergerTest {
 	@Test
 	public void testEnrcichAuthor() throws Exception {
 		final ObjectMapper mapper = new ObjectMapper();
 		BufferedReader pr = new BufferedReader(new InputStreamReader(
 			Objects
 				.requireNonNull(
 					AuthorMergerTest.class
 						.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
 		BufferedReader or = new BufferedReader(new InputStreamReader(
 			Objects
 				.requireNonNull(
 					AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
 		TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
 		};
 		String pubLine;
 		int i = 0;
 		while ((pubLine = pr.readLine()) != null) {
 			final String pubId = pubLine;
 			final String MatchPidOrcid = or.readLine();
 			final String pubOrcid = or.readLine();
 			final String data = pr.readLine();
 			if (StringUtils.isNotBlank(data)) {
 				List<Author> publicationAuthors = mapper.readValue(data, aclass);
 				List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
 				System.out.printf("OAF ID = %s \n", pubId);
 				System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
 				System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
 				System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
 				System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
 				long originalAuthorWithPiD = publicationAuthors
 					.stream()
 					.filter(
 						a -> a.getPid() != null && a
 							.getPid()
 							.stream()
 							.anyMatch(
 								p -> p.getQualifier() != null
 									&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
 					.count();
 				long start = System.currentTimeMillis();
 //                final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
 				final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
 				long enrichedAuthorWithPid = enrichedList
 					.stream()
 					.filter(
 						a -> a.getPid() != null && a
 							.getPid()
 							.stream()
 							.anyMatch(
 								p -> p.getQualifier() != null
 									&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
 					.count();
 				long totalTime = (System.currentTimeMillis() - start) / 1000;
 				System.out
 					.printf(
 						"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
 						enrichedAuthorWithPid);
 				System.out.println("=================");
 			}
 		}
 	}
 	@Test
 	public void checkSimilarityTest() {
 		final Author left = new Author();
 		left.setName("Anand");
 		left.setSurname("Rachna");
 		left.setFullname("Anand, Rachna");
 		System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
 		final Author right = new Author();
 		right.setName("Rachna");
 		right.setSurname("Anand");
 		right.setFullname("Rachna, Anand");
 //        System.out.println(AuthorMerger.normalize(right.getFullname()));
 		boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
 		assertTrue(same);
 	}
 }
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDDumpApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDDumpApplication.java
@ -0,0 +1,102 @@
 package eu.dnetlib.dhp.collection.orcid;
 import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
 import java.io.InputStream;
 import java.net.URL;
 import java.net.URLConnection;
 import java.util.Objects;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.http.client.config.RequestConfig;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClientBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 public class DownloadORCIDDumpApplication {
 	private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
 	private final FileSystem fileSystem;
 	public DownloadORCIDDumpApplication(FileSystem fileSystem) {
 		this.fileSystem = fileSystem;
 	}
 	public static void main(String[] args) throws Exception {
 		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					Objects
 						.requireNonNull(
 							DownloadORCIDDumpApplication.class
 								.getResourceAsStream(
 									"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
 		argumentParser.parseArgument(args);
 		final String hdfsuri = argumentParser.get("namenode");
 		log.info("hdfsURI is {}", hdfsuri);
 		final String targetPath = argumentParser.get("targetPath");
 		log.info("targetPath is {}", targetPath);
 		final String apiURL = argumentParser.get("apiURL");
 		log.info("apiURL is {}", apiURL);
 		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
 		new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
 	}
 	private void downloadItem(final String name, final String itemURL, final String basePath) {
 		try {
 			final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
 			final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
 			final HttpGet request = new HttpGet(itemURL);
 			final int timeout = 60; // seconds
 			final RequestConfig config = RequestConfig
 				.custom()
 				.setConnectTimeout(timeout * 1000)
 				.setConnectionRequestTimeout(timeout * 1000)
 				.setSocketTimeout(timeout * 1000)
 				.build();
 			log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
 			try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
 				CloseableHttpResponse response = client.execute(request)) {
 				int responseCode = response.getStatusLine().getStatusCode();
 				log.info("Response code is {}", responseCode);
 				if (responseCode >= 200 && responseCode < 400) {
 					IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
 				}
 			} catch (Throwable eu) {
 				throw new RuntimeException(eu);
 			}
 		} catch (Throwable e) {
 			throw new RuntimeException(e);
 		}
 	}
 	protected void run(final String targetPath, final String apiURL) throws Exception {
 		final ObjectMapper mapper = new ObjectMapper();
 		final URL url = new URL(apiURL);
 		URLConnection conn = url.openConnection();
 		InputStream is = conn.getInputStream();
 		final String json = IOUtils.toString(is);
 		JsonNode jsonNode = mapper.readTree(json);
 		jsonNode
 			.get("files")
 			.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ExtractORCIDDump.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ExtractORCIDDump.java
@ -0,0 +1,71 @@
 package eu.dnetlib.dhp.collection.orcid;
 import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Objects;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 public class ExtractORCIDDump {
 	private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
 	private final FileSystem fileSystem;
 	public ExtractORCIDDump(FileSystem fileSystem) {
 		this.fileSystem = fileSystem;
 	}
 	public static void main(String[] args) throws Exception {
 		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
 					Objects
 						.requireNonNull(
 							DownloadORCIDDumpApplication.class
 								.getResourceAsStream(
 									"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
 		argumentParser.parseArgument(args);
 		final String hdfsuri = argumentParser.get("namenode");
 		log.info("hdfsURI is {}", hdfsuri);
 		final String sourcePath = argumentParser.get("sourcePath");
 		log.info("sourcePath is {}", sourcePath);
 		final String targetPath = argumentParser.get("targetPath");
 		log.info("targetPath is {}", targetPath);
 		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
 		new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
 	}
 	public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
 		RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
 		final List<ORCIDExtractor> workers = new ArrayList<>();
 		int i = 0;
 		while (ls.hasNext()) {
 			LocatedFileStatus current = ls.next();
 			if (current.getPath().getName().endsWith("tar.gz")) {
 				workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
 			}
 		}
 		workers.forEach(Thread::start);
 		for (ORCIDExtractor worker : workers) {
 			worker.join();
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java
@ -0,0 +1,171 @@
 package eu.dnetlib.dhp.collection.orcid;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.HashMap;
 import java.util.Map;
 import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.CompressionCodecFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 /**\
 * The ORCIDExtractor class extracts ORCID data from a TAR archive.
 * The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
 * Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
 * Finally, it closes all the SequenceFile.Writer objects.
 */
 public class ORCIDExtractor extends Thread {
 	private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
 	private final FileSystem fileSystem;
 	private final String id;
 	private final Path sourcePath;
 	private final String baseOutputPath;
 	public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
 		this.fileSystem = fileSystem;
 		this.id = id;
 		this.sourcePath = sourcePath;
 		this.baseOutputPath = baseOutputPath;
 	}
 	/**
 	 * creates a map of SequenceFile.Writer objects,
 	 * one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
 	 * For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
 	 * object that writes employment data.
 	 * @return the Map
 	 */
 	private Map<String, SequenceFile.Writer> createMap() {
 		try {
 			log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
 			Map<String, SequenceFile.Writer> res = new HashMap<>();
 			if (sourcePath.getName().contains("summaries")) {
 				final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
 				final SequenceFile.Writer summary_file = SequenceFile
 					.createWriter(
 						fileSystem.getConf(),
 						SequenceFile.Writer.file(new Path(summaryPath)),
 						SequenceFile.Writer.keyClass(Text.class),
 						SequenceFile.Writer.valueClass(Text.class));
 				log.info("Thread {} Creating only summary path here {}", id, summaryPath);
 				res.put("summary", summary_file);
 				return res;
 			} else {
 				String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
 				final SequenceFile.Writer employments_file = SequenceFile
 					.createWriter(
 						fileSystem.getConf(),
 						SequenceFile.Writer.file(new Path(employmentsPath)),
 						SequenceFile.Writer.keyClass(Text.class),
 						SequenceFile.Writer.valueClass(Text.class));
 				res.put("employments", employments_file);
 				log.info("Thread {} Creating employments path here {}", id, employmentsPath);
 				final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
 				final SequenceFile.Writer works_file = SequenceFile
 					.createWriter(
 						fileSystem.getConf(),
 						SequenceFile.Writer.file(new Path(worksPath)),
 						SequenceFile.Writer.keyClass(Text.class),
 						SequenceFile.Writer.valueClass(Text.class));
 				res.put("works", works_file);
 				log.info("Thread {} Creating works path here {}", id, worksPath);
 				return res;
 			}
 		} catch (Throwable e) {
 			throw new RuntimeException(e);
 		}
 	}
 	@Override
 	public void run() {
 		CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
 		CompressionCodec codec = factory.getCodec(sourcePath);
 		if (codec == null) {
 			System.err.println("No codec found for " + sourcePath.getName());
 			System.exit(1);
 		}
 		InputStream gzipInputStream = null;
 		try {
 			gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
 			final Map<String, SequenceFile.Writer> fileMap = createMap();
 			iterateTar(fileMap, gzipInputStream);
 		} catch (IOException e) {
 			throw new RuntimeException(e);
 		} finally {
 			log.info("Closing gzip stream");
 			IOUtils.closeStream(gzipInputStream);
 		}
 	}
 	private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
 		if (sourcePath.getName().contains("summaries")) {
 			return fileMap.get("summary");
 		}
 		if (path.contains("works")) {
 			return fileMap.get("works");
 		}
 		if (path.contains("employments"))
 			return fileMap.get("employments");
 		return null;
 	}
 	private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
 		int extractedItem = 0;
 		try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
 			TarArchiveEntry entry;
 			while ((entry = tais.getNextTarEntry()) != null) {
 				if (entry.isFile()) {
 					final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
 					if (fl != null) {
 						final Text key = new Text(entry.getName());
 						final Text value = new Text(
 							org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
 						fl.append(key, value);
 						extractedItem++;
 						if (extractedItem % 100000 == 0) {
 							log.info("Thread {}: Extracted {} items", id, extractedItem);
 							break;
 						}
 					}
 				}
 			}
 		} finally {
 			for (SequenceFile.Writer k : fileMap.values()) {
 				log.info("Thread {}: Completed processed {} items", id, extractedItem);
 				k.hflush();
 				k.close();
 			}
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidParser.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidParser.java
@ -0,0 +1,251 @@
 package eu.dnetlib.dhp.collection.orcid;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import org.apache.commons.lang3.StringUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.ximpleware.*;
 import eu.dnetlib.dhp.collection.orcid.model.*;
 import eu.dnetlib.dhp.parser.utility.VtdException;
 import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
 public class OrcidParser {
 	final Logger log = LoggerFactory.getLogger(OrcidParser.class);
 	private VTDNav vn;
 	private AutoPilot ap;
 	private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
 	private static final String NS_COMMON = "common";
 	private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
 	private static final String NS_PERSON = "person";
 	private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
 	private static final String NS_DETAILS = "personal-details";
 	private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
 	private static final String NS_OTHER = "other-name";
 	private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
 	private static final String NS_RECORD = "record";
 	private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
 	private static final String NS_ACTIVITIES = "activities";
 	private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
 	private static final String NS_WORK = "work";
 	private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
 	private static final String NS_ERROR = "error";
 	private static final String NS_HISTORY = "history";
 	private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
 	private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
 	private static final String NS_BULK = "bulk";
 	private static final String NS_EXTERNAL = "external-identifier";
 	private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
 	private void generateParsedDocument(final String xml) throws ParseException {
 		final VTDGen vg = new VTDGen();
 		vg.setDoc(xml.getBytes());
 		vg.parse(true);
 		this.vn = vg.getNav();
 		this.ap = new AutoPilot(vn);
 		ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
 		ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
 		ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
 		ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
 		ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
 		ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
 		ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
 		ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
 		ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
 		ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
 	}
 	public Author parseSummary(final String xml) {
 		try {
 			final Author author = new Author();
 			generateParsedDocument(xml);
 			List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
 				.getTextValuesWithAttributes(
 					ap, vn, "//record:record", Arrays.asList("path"));
 			if (!recordNodes.isEmpty()) {
 				final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
 				author.setOrcid(oid);
 			} else {
 				return null;
 			}
 			List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
 				.getTextValuesWithAttributes(
 					ap, vn, "//person:name", Arrays.asList("visibility"));
 			final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
 			author.setVisibility(visibility);
 			final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
 			author.setGivenName(name);
 			final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
 			author.setFamilyName(surnames);
 			final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
 			author.setCreditName(creditNames);
 			final String biography = VtdUtilityParser
 				.getSingleValue(ap, vn, "//person:biography/personal-details:content");
 			author.setBiography(biography);
 			final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
 			if (!otherNames.isEmpty()) {
 				author.setOtherNames(otherNames);
 			}
 			ap.selectXPath("//external-identifier:external-identifier");
 			while (ap.evalXPath() != -1) {
 				final Pid pid = new Pid();
 				final AutoPilot ap1 = new AutoPilot(ap.getNav());
 				ap1.selectXPath("./common:external-id-type");
 				while (ap1.evalXPath() != -1) {
 					int it = vn.getText();
 					pid.setSchema(vn.toNormalizedString(it));
 				}
 				ap1.selectXPath("./common:external-id-value");
 				while (ap1.evalXPath() != -1) {
 					int it = vn.getText();
 					pid.setValue(vn.toNormalizedString(it));
 				}
 				author.addOtherPid(pid);
 			}
 			return author;
 		} catch (Throwable e) {
 			log.error("Error on parsing {}", xml);
 			log.error(e.getMessage());
 			return null;
 		}
 	}
 	public Work parseWork(final String xml) {
 		try {
 			final Work work = new Work();
 			generateParsedDocument(xml);
 			List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
 				.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
 			if (!workNodes.isEmpty()) {
 				final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
 				work.setOrcid(oid);
 			} else {
 				return null;
 			}
 			ap.selectXPath("//common:external-id");
 			while (ap.evalXPath() != -1) {
 				final Pid pid = new Pid();
 				final AutoPilot ap1 = new AutoPilot(ap.getNav());
 				ap1.selectXPath("./common:external-id-type");
 				while (ap1.evalXPath() != -1) {
 					int it = vn.getText();
 					pid.setSchema(vn.toNormalizedString(it));
 				}
 				ap1.selectXPath("./common:external-id-value");
 				while (ap1.evalXPath() != -1) {
 					int it = vn.getText();
 					pid.setValue(vn.toNormalizedString(it));
 				}
 				work.addPid(pid);
 			}
 			work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
 			return work;
 		} catch (Throwable e) {
 			log.error("Error on parsing {}", xml);
 			log.error(e.getMessage());
 			return null;
 		}
 	}
 	private String extractEmploymentDate(final String xpath) throws Exception {
 		ap.selectXPath(xpath);
 		StringBuilder sb = new StringBuilder();
 		while (ap.evalXPath() != -1) {
 			final AutoPilot ap1 = new AutoPilot(ap.getNav());
 			ap1.selectXPath("./common:year");
 			while (ap1.evalXPath() != -1) {
 				int it = vn.getText();
 				sb.append(vn.toNormalizedString(it));
 			}
 			ap1.selectXPath("./common:month");
 			while (ap1.evalXPath() != -1) {
 				int it = vn.getText();
 				sb.append("-");
 				sb.append(vn.toNormalizedString(it));
 			}
 			ap1.selectXPath("./common:day");
 			while (ap1.evalXPath() != -1) {
 				int it = vn.getText();
 				sb.append("-");
 				sb.append(vn.toNormalizedString(it));
 			}
 		}
 		return sb.toString();
 	}
 	public Employment parseEmployment(final String xml) {
 		try {
 			final Employment employment = new Employment();
 			generateParsedDocument(xml);
 			final String oid = VtdUtilityParser
 				.getSingleValue(ap, vn, "//common:source-orcid/common:path");
 			if (StringUtils.isNotBlank(oid)) {
 				employment.setOrcid(oid);
 			} else {
 				return null;
 			}
 			final String depName = VtdUtilityParser
 				.getSingleValue(ap, vn, "//common:department-name");
 			final String rolTitle = VtdUtilityParser
 				.getSingleValue(ap, vn, "//common:role-title");
 			if (StringUtils.isNotBlank(rolTitle))
 				employment.setRoleTitle(rolTitle);
 			if (StringUtils.isNotBlank(depName))
 				employment.setDepartmentName(depName);
 			else
 				employment
 					.setDepartmentName(
 						VtdUtilityParser
 							.getSingleValue(ap, vn, "//common:organization/common:name"));
 			employment.setStartDate(extractEmploymentDate("//common:start-date"));
 			employment.setEndDate(extractEmploymentDate("//common:end-date"));
 			final String affiliationId = VtdUtilityParser
 				.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
 			final String affiliationIdType = VtdUtilityParser
 				.getSingleValue(ap, vn, "//common:disambiguation-source");
 			if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
 				employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
 			return employment;
 		} catch (Throwable e) {
 			log.error("Error on parsing {}", xml);
 			log.error(e.getMessage());
 			return null;
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java
@ -0,0 +1,83 @@
 package eu.dnetlib.dhp.collection.orcid.model;
 import java.util.ArrayList;
 import java.util.List;
 public class Author extends ORCIDItem {
 	private String givenName;
 	private String familyName;
 	private String visibility;
 	private String creditName;
 	private List<String> otherNames;
 	private List<Pid> otherPids;
 	private String biography;
 	public String getBiography() {
 		return biography;
 	}
 	public void setBiography(String biography) {
 		this.biography = biography;
 	}
 	public String getGivenName() {
 		return givenName;
 	}
 	public void setGivenName(String givenName) {
 		this.givenName = givenName;
 	}
 	public String getFamilyName() {
 		return familyName;
 	}
 	public void setFamilyName(String familyName) {
 		this.familyName = familyName;
 	}
 	public String getCreditName() {
 		return creditName;
 	}
 	public void setCreditName(String creditName) {
 		this.creditName = creditName;
 	}
 	public List<String> getOtherNames() {
 		return otherNames;
 	}
 	public void setOtherNames(List<String> otherNames) {
 		this.otherNames = otherNames;
 	}
 	public String getVisibility() {
 		return visibility;
 	}
 	public void setVisibility(String visibility) {
 		this.visibility = visibility;
 	}
 	public List<Pid> getOtherPids() {
 		return otherPids;
 	}
 	public void setOtherPids(List<Pid> otherPids) {
 		this.otherPids = otherPids;
 	}
 	public void addOtherPid(final Pid pid) {
 		if (otherPids == null)
 			otherPids = new ArrayList<>();
 		otherPids.add(pid);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Employment.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Employment.java
@ -0,0 +1,54 @@
 package eu.dnetlib.dhp.collection.orcid.model;
 public class Employment extends ORCIDItem {
 	private String startDate;
 	private String EndDate;
 	private Pid affiliationId;
 	private String departmentName;
 	private String roleTitle;
 	public String getStartDate() {
 		return startDate;
 	}
 	public void setStartDate(String startDate) {
 		this.startDate = startDate;
 	}
 	public String getEndDate() {
 		return EndDate;
 	}
 	public void setEndDate(String endDate) {
 		EndDate = endDate;
 	}
 	public Pid getAffiliationId() {
 		return affiliationId;
 	}
 	public void setAffiliationId(Pid affiliationId) {
 		this.affiliationId = affiliationId;
 	}
 	public String getDepartmentName() {
 		return departmentName;
 	}
 	public void setDepartmentName(String departmentName) {
 		this.departmentName = departmentName;
 	}
 	public String getRoleTitle() {
 		return roleTitle;
 	}
 	public void setRoleTitle(String roleTitle) {
 		this.roleTitle = roleTitle;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java
@ -0,0 +1,14 @@
 package eu.dnetlib.dhp.collection.orcid.model;
 public class ORCIDItem {
 	private String orcid;
 	public String getOrcid() {
 		return orcid;
 	}
 	public void setOrcid(String orcid) {
 		this.orcid = orcid;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Pid.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Pid.java
@ -0,0 +1,33 @@
 package eu.dnetlib.dhp.collection.orcid.model;
 public class Pid {
 	private String value;
 	private String schema;
 	public Pid() {
 	}
 	public Pid(String value, String schema) {
 		this.value = value;
 		this.schema = schema;
 	}
 	public String getValue() {
 		return value;
 	}
 	public void setValue(String value) {
 		this.value = value;
 	}
 	public String getSchema() {
 		return schema;
 	}
 	public void setSchema(String schema) {
 		this.schema = schema;
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java
@ -0,0 +1,35 @@
 package eu.dnetlib.dhp.collection.orcid.model;
 import java.util.ArrayList;
 import java.util.List;
 public class Work extends ORCIDItem {
 	private String title;
 	private List<Pid> pids;
 	public String getTitle() {
 		return title;
 	}
 	public void setTitle(String title) {
 		this.title = title;
 	}
 	public List<Pid> getPids() {
 		return pids;
 	}
 	public void setPids(List<Pid> pids) {
 		this.pids = pids;
 	}
 	public void addPid(Pid pid) {
 		if (pids == null)
 			pids = new ArrayList<>();
 		pids.add(pid);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json
@ -0,0 +1,21 @@
 [
  {
    "paramName": "n",
    "paramLongName": "namenode",
    "paramDescription": "the Name Node URI",
    "paramRequired": true
  },
  {
    "paramName": "t",
    "paramLongName": "targetPath",
    "paramDescription": "the target PATH where download the files",
    "paramRequired": true
  },
  {
    "paramName": "a",
    "paramLongName": "apiURL",
    "paramDescription": "the FIGSHARE  API id URL to retrieve all the dump files",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json
@ -0,0 +1,21 @@
 [
  {
    "paramName": "n",
    "paramLongName": "namenode",
    "paramDescription": "the Name Node URI",
    "paramRequired": true
  },
  {
    "paramName": "t",
    "paramLongName": "targetPath",
    "paramDescription": "the target PATH to extract files",
    "paramRequired": true
  },
  {
    "paramName": "s",
    "paramLongName": "sourcePath",
    "paramDescription": "the PATH where the tar.gz files were downloaded",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json
@ -0,0 +1,21 @@
 [
  {
    "paramName": "m",
    "paramLongName": "master",
    "paramDescription": "the master name",
    "paramRequired": true
  },
  {
    "paramName": "t",
    "paramLongName": "targetPath",
    "paramDescription": "the target PATH of the DF tables",
    "paramRequired": true
  },
  {
    "paramName": "s",
    "paramLongName": "sourcePath",
    "paramDescription": "the PATH of the ORCID sequence file",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/config-default.xml
@ -0,0 +1,23 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>oozie.launcher.mapreduce.user.classpath.first</name>
        <value>true</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/workflow.xml
@ -0,0 +1,81 @@
 <workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>targetPath</name>
            <description>the  path  to store the original ORCID dump</description>
        </property>
        <property>
            <name>apiURL</name>
            <description>The figshare  API URL to retrieve the list file to download</description>
        </property>
    </parameters>
    <start to="generateTables"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="DownloadDUMP">
        <java>
            <configuration>
                <property>
                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
                    <value>true</value>
                </property>
            </configuration>
            <main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
            <arg>--namenode</arg><arg>${nameNode}</arg>
            <arg>--targetPath</arg><arg>${targetPath}</arg>
            <arg>--apiURL</arg><arg>${apiURL}</arg>
        </java>
        <ok to="extractDump"/>
        <error to="Kill"/>
    </action>
    <action name="extractDump">
        <java>
            <configuration>
                <property>
                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
                    <value>true</value>
                </property>
            </configuration>
            <main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
            <java-opts> -Xmx6g </java-opts>
            <arg>--namenode</arg><arg>${nameNode}</arg>
            <arg>--sourcePath</arg><arg>${targetPath}</arg>
            <arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
        </java>
        <ok to="generateTables"/>
        <error to="Kill"/>
    </action>
    <action name="generateTables">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Generate ORCID Tables</name>
            <class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.executor.memoryOverhead=2g
                --conf spark.sql.shuffle.partitions=3000
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
            <arg>--targetPath</arg><arg>${targetPath}/tables</arg>
            <arg>--master</arg><arg>yarn</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/preprocess_orcid_dump_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/preprocess_orcid_dump_parameter.json
@ -0,0 +1,21 @@
 [
  {
    "paramName": "n",
    "paramLongName": "namenode",
    "paramDescription": "the Name Node URI",
    "paramRequired": true
  },
  {
    "paramName": "t",
    "paramLongName": "targetPath",
    "paramDescription": "the target PATH where download the files",
    "paramRequired": true
  },
  {
    "paramName": "a",
    "paramLongName": "apiURL",
    "paramDescription": "the FIGSHARE  API id URL to retrieve all the dump files",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/orcid/SparkGenerateORCIDTable.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/orcid/SparkGenerateORCIDTable.scala
@ -0,0 +1,101 @@
 package eu.dnetlib.dhp.collection.orcid
 import eu.dnetlib.dhp.application.AbstractScalaApplication
 import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
 import org.apache.hadoop.io.Text
 import org.apache.spark.SparkContext
 import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}
 class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
    extends AbstractScalaApplication(propertyPath, args, log: Logger) {
  /** Here all the spark applications runs this method
    * where the whole logic of the spark node is defined
    */
  override def run(): Unit = {
    val sourcePath: String = parser.get("sourcePath")
    log.info("found parameters sourcePath: {}", sourcePath)
    val targetPath: String = parser.get("targetPath")
    log.info("found parameters targetPath: {}", targetPath)
    extractORCIDTable(spark, sourcePath, targetPath)
    extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
    extractORCIDWorksTable(spark, sourcePath, targetPath)
  }
  def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
    val sc: SparkContext = spark.sparkContext
    import spark.implicits._
    val df = sc
      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
      .map { case (x, y) => (x.toString, y.toString) }
      .toDF
      .as[(String, String)]
    implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
 //    implicit  val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
    df.filter(r => r._1.contains("summaries"))
      .map { r =>
        val p = new OrcidParser
        p.parseSummary(r._2)
      }
      .filter(p => p != null)
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$targetPath/Authors")
  }
  def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
    val sc: SparkContext = spark.sparkContext
    import spark.implicits._
    val df = sc
      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
      .map { case (x, y) => (x.toString, y.toString) }
      .toDF
      .as[(String, String)]
    implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
    implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
    df.filter(r => r._1.contains("works"))
      .map { r =>
        val p = new OrcidParser
        p.parseWork(r._2)
      }
      .filter(p => p != null)
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$targetPath/Works")
  }
  def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
    val sc: SparkContext = spark.sparkContext
    import spark.implicits._
    val df = sc
      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
      .map { case (x, y) => (x.toString, y.toString) }
      .toDF
      .as[(String, String)]
    implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
    implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
    df.filter(r => r._1.contains("employments"))
      .map { r =>
        val p = new OrcidParser
        p.parseEmployment(r._2)
      }
      .filter(p => p != null)
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$targetPath/Employments")
  }
 }
 object SparkGenerateORCIDTable {
  val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
  def main(args: Array[String]): Unit = {
    new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
      .initialize()
      .run()
  }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          ), resourceType
+          ),
          resourceType
        )
    }
    if (schemaOrg != null && schemaOrg.nonEmpty) {
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          ), schemaOrg
+          ),
          schemaOrg
        )
    }
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          ), resourceTypeGeneral
+          ),
          resourceTypeGeneral
        )
    }
@ -228,7 +231,6 @@ object DataciteToOAFTransformation {
    itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
    i.setInstanceTypeMapping(List(itm).asJava)
    typeQualifiers._2.getClassname match {
      case "dataset" =>
        val r = new OafDataset
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
@ -593,7 +593,6 @@ object BioDBToOAF {
    itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
    i.setInstanceTypeMapping(List(itm).asJava)
    i.setCollectedfrom(collectedFromMap("ebi"))
    d.setInstance(List(i).asJava)
    i.setDateofacceptance(
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
@ -195,7 +195,7 @@ object PubMedToOaf {
      pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
    } else {
      val i_type = article.getPublicationTypes.asScala
-        .map(s => (s.getValue,getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
+        .map(s => (s.getValue, getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
        .find(q => q._2 != null)
      if (i_type.isDefined) {
@ -205,8 +205,7 @@ object PubMedToOaf {
        itm.setOriginalType(i_type.get._1)
        itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
        pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
-      }
+      } else
      else
        return null
    }
    val result = createResult(pubmedInstance.getInstancetype, vocabularies)
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDTest.java
@ -0,0 +1,119 @@
 package eu.dnetlib.dhp.collection.orcid;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.io.Text;
 import org.apache.spark.SparkContext;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.ximpleware.NavException;
 import com.ximpleware.ParseException;
 import com.ximpleware.XPathEvalException;
 import com.ximpleware.XPathParseException;
 import eu.dnetlib.dhp.collection.orcid.model.Author;
 import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
 import eu.dnetlib.dhp.parser.utility.VtdException;
 public class DownloadORCIDTest {
 	private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
 	@Test
 	public void testSummary() throws Exception {
 		final String xml = IOUtils
 			.toString(
 				Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
 		final OrcidParser parser = new OrcidParser();
 		ORCIDItem orcidItem = parser.parseSummary(xml);
 		final ObjectMapper mapper = new ObjectMapper();
 		System.out.println(mapper.writeValueAsString(orcidItem));
 	}
 	@Test
 	public void testParsingWork() throws Exception {
 		final List<String> works_path = Arrays
 			.asList(
 				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
 				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
 				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
 				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
 				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
 			);
 		final OrcidParser parser = new OrcidParser();
 		final ObjectMapper mapper = new ObjectMapper();
 		works_path.stream().map(s -> {
 			try {
 				return IOUtils
 					.toString(
 						Objects
 							.requireNonNull(
 								getClass()
 									.getResourceAsStream(
 										s)));
 			} catch (IOException e) {
 				throw new RuntimeException(e);
 			}
 		}).forEach(s -> {
 			try {
 				System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
 			} catch (Exception e) {
 				throw new RuntimeException(e);
 			}
 		});
 	}
 	@Test
 	public void testParsingEmployments() throws Exception {
 		final List<String> works_path = Arrays
 			.asList(
 				"/eu/dnetlib/dhp/collection/orcid/employment.xml",
 				"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
 				"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
 			);
 		final OrcidParser parser = new OrcidParser();
 		final ObjectMapper mapper = new ObjectMapper();
 		works_path.stream().map(s -> {
 			try {
 				return IOUtils
 					.toString(
 						Objects
 							.requireNonNull(
 								getClass()
 									.getResourceAsStream(
 										s)));
 			} catch (IOException e) {
 				throw new RuntimeException(e);
 			}
 		}).forEach(s -> {
 			try {
 				System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
 			} catch (Exception e) {
 				throw new RuntimeException(e);
 			}
 		});
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml
@ -0,0 +1,69 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
    <common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
    <common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
    <common:source>
        <common:source-client-id>
            <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
            <common:path>0000-0002-5982-8983</common:path>
            <common:host>orcid.org</common:host>
        </common:source-client-id>
        <common:source-name>Scopus - Elsevier</common:source-name>
        <common:assertion-origin-orcid>
            <common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
            <common:path>0000-0001-5010-5001</common:path>
            <common:host>orcid.org</common:host>
        </common:assertion-origin-orcid>
        <common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
    </common:source>
    <work:title>
        <common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
    </work:title>
    <work:journal-title>American Journal of Neuroradiology</work:journal-title>
    <work:citation>
        <work:citation-type>bibtex</work:citation-type>
        <work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
    </work:citation>
    <work:type>journal-article</work:type>
    <common:publication-date>
        <common:year>2014</common:year>
    </common:publication-date>
    <common:external-ids>
        <common:external-id>
            <common:external-id-type>doi</common:external-id-type>
            <common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
            <common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
        <common:external-id>
            <common:external-id-type>eid</common:external-id-type>
            <common:external-id-value>2-s2.0-84911865199</common:external-id-value>
            <common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
    </common:external-ids>
    <common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&amp;partnerID=MN8TOARS</common:url>
    <work:contributors>
        <work:contributor>
            <work:credit-name>Durst, C.</work:credit-name>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Starke, R.M.</work:credit-name>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Gaughen, J.</work:credit-name>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Nguyen, Q.</work:credit-name>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Patrie, J.</work:credit-name>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Jensen, M.E.</work:credit-name>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Evans, A.J.</work:credit-name>
        </work:contributor>
    </work:contributors>
 </work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml
@ -0,0 +1,79 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <work:work xmlns:address="http://www.orcid.org/ns/address"
    xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
    xmlns:employment="http://www.orcid.org/ns/employment"
    xmlns:education="http://www.orcid.org/ns/education"
    xmlns:other-name="http://www.orcid.org/ns/other-name"
    xmlns:deprecated="http://www.orcid.org/ns/deprecated"
    xmlns:funding="http://www.orcid.org/ns/funding"
    xmlns:research-resource="http://www.orcid.org/ns/research-resource"
    xmlns:service="http://www.orcid.org/ns/service"
    xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
    xmlns:distinction="http://www.orcid.org/ns/distinction"
    xmlns:internal="http://www.orcid.org/ns/internal"
    xmlns:membership="http://www.orcid.org/ns/membership"
    xmlns:person="http://www.orcid.org/ns/person"
    xmlns:personal-details="http://www.orcid.org/ns/personal-details"
    xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
    xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
    xmlns:activities="http://www.orcid.org/ns/activities"
    xmlns:qualification="http://www.orcid.org/ns/qualification"
    xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
    xmlns:error="http://www.orcid.org/ns/error"
    xmlns:preferences="http://www.orcid.org/ns/preferences"
    xmlns:invited-position="http://www.orcid.org/ns/invited-position"
    xmlns:work="http://www.orcid.org/ns/work"
    xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
    path="/0000-0001-5349-4030/work/50101152" visibility="public">
    <common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
    <common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
    <common:source>
        <common:source-client-id>
            <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
            <common:path>0000-0002-5982-8983</common:path>
            <common:host>orcid.org</common:host>
        </common:source-client-id>
        <common:source-name>Scopus - Elsevier</common:source-name>
    </common:source>
    <work:title>
        <common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
            racially homogenous and heterogeneous U.S. history classrooms</common:title>
    </work:title>
    <work:journal-title>Journal of Social Studies Research</work:journal-title>
    <work:citation>
        <work:citation-type>bibtex</work:citation-type>
        <work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
            candor in addressing social injustices in racially homogenous and heterogeneous U.S.
            history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
            = {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
    </work:citation>
    <work:type>journal-article</work:type>
    <common:publication-date>
        <common:year>2018</common:year>
    </common:publication-date>
    <common:external-ids>
        <common:external-id>
            <common:external-id-type>doi</common:external-id-type>
            <common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
            <common:external-id-normalized transient="true"
                >10.1016/j.jssr.2018.01.004</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
        <common:external-id>
            <common:external-id-type>eid</common:external-id-type>
            <common:external-id-value>2-s2.0-85041949043</common:external-id-value>
            <common:external-id-normalized transient="true"
                >2-s2.0-85041949043</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
    </common:external-ids>
    <common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&amp;partnerID=MN8TOARS</common:url>
    <work:contributors>
        <work:contributor>
            <work:credit-name>Parkhouse, H.</work:credit-name>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Massaro, V.R.</work:credit-name>
        </work:contributor>
    </work:contributors>
 </work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml
@ -0,0 +1,113 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <work:work xmlns:address="http://www.orcid.org/ns/address"
           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
           xmlns:employment="http://www.orcid.org/ns/employment"
           xmlns:education="http://www.orcid.org/ns/education"
           xmlns:other-name="http://www.orcid.org/ns/other-name"
           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
           xmlns:funding="http://www.orcid.org/ns/funding"
           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
           xmlns:service="http://www.orcid.org/ns/service"
           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
           xmlns:distinction="http://www.orcid.org/ns/distinction"
           xmlns:internal="http://www.orcid.org/ns/internal"
           xmlns:membership="http://www.orcid.org/ns/membership"
           xmlns:person="http://www.orcid.org/ns/person"
           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
           xmlns:activities="http://www.orcid.org/ns/activities"
           xmlns:qualification="http://www.orcid.org/ns/qualification"
           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
           xmlns:error="http://www.orcid.org/ns/error"
           xmlns:preferences="http://www.orcid.org/ns/preferences"
           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
           xmlns:work="http://www.orcid.org/ns/work"
           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
           path="/0000-0003-2760-1191/work/28776099" visibility="public">
    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
    <common:source>
        <common:source-orcid>
            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
            <common:path>0000-0002-9157-3431</common:path>
            <common:host>orcid.org</common:host>
        </common:source-orcid>
        <common:source-name>Europe PubMed Central</common:source-name>
    </common:source>
    <work:title>
        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
            ST-Segment-Elevation Myocardial Infarction.</common:title>
    </work:title>
    <work:citation>
        <work:citation-type>formatted-unspecified</work:citation-type>
        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
    </work:citation>
    <work:type>journal-article</work:type>
    <common:publication-date>
        <common:year>2016</common:year>
        <common:month>11</common:month>
    </common:publication-date>
    <common:external-ids>
        <common:external-id>
            <common:external-id-type>pmid</common:external-id-type>
            <common:external-id-value>27899851</common:external-id-value>
            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
        <common:external-id>
            <common:external-id-type>pmc</common:external-id-type>
            <common:external-id-value>PMC5126442</common:external-id-value>
            <common:external-id-normalized transient="true"
            >PMC5126442</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
    </common:external-ids>
    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
    <work:contributors>
        <work:contributor>
            <work:credit-name>Abdel-Dayem K</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Abdel-Dayem Fake</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Eweda II</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>El-Sherbiny A</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Dimitry MO</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Nammas W</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
    </work:contributors>
 </work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml
@ -0,0 +1,106 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <work:work xmlns:address="http://www.orcid.org/ns/address"
           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
           xmlns:employment="http://www.orcid.org/ns/employment"
           xmlns:education="http://www.orcid.org/ns/education"
           xmlns:other-name="http://www.orcid.org/ns/other-name"
           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
           xmlns:funding="http://www.orcid.org/ns/funding"
           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
           xmlns:service="http://www.orcid.org/ns/service"
           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
           xmlns:distinction="http://www.orcid.org/ns/distinction"
           xmlns:internal="http://www.orcid.org/ns/internal"
           xmlns:membership="http://www.orcid.org/ns/membership"
           xmlns:person="http://www.orcid.org/ns/person"
           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
           xmlns:activities="http://www.orcid.org/ns/activities"
           xmlns:qualification="http://www.orcid.org/ns/qualification"
           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
           xmlns:error="http://www.orcid.org/ns/error"
           xmlns:preferences="http://www.orcid.org/ns/preferences"
           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
           xmlns:work="http://www.orcid.org/ns/work"
           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
           path="/0000-0003-2760-1191/work/28776099" visibility="public">
    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
    <common:source>
        <common:source-orcid>
            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
            <common:path>0000-0002-9157-3431</common:path>
            <common:host>orcid.org</common:host>
        </common:source-orcid>
        <common:source-name>Europe PubMed Central</common:source-name>
    </common:source>
    <work:title>
        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
            ST-Segment-Elevation Myocardial Infarction.</common:title>
    </work:title>
    <work:citation>
        <work:citation-type>formatted-unspecified</work:citation-type>
        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
    </work:citation>
    <work:type>journal-article</work:type>
    <common:publication-date>
        <common:year>2016</common:year>
        <common:month>11</common:month>
    </common:publication-date>
    <common:external-ids>
        <common:external-id>
            <common:external-id-type>pmid</common:external-id-type>
            <common:external-id-value>27899851</common:external-id-value>
            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
        <common:external-id>
            <common:external-id-type>pmc</common:external-id-type>
            <common:external-id-value>PMC5126442</common:external-id-value>
            <common:external-id-normalized transient="true"
            >PMC5126442</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
    </common:external-ids>
    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
    <work:contributors>
        <work:contributor>
            <work:credit-name>Khair Abde Daye</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Eweda II</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>El-Sherbiny A</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Dimitry MO</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>Nammas W</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>first</work:contributor-sequence>
                <work:contributor-role>author</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
    </work:contributors>
 </work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml
@ -0,0 +1,101 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <work:work xmlns:address="http://www.orcid.org/ns/address"
           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
           xmlns:employment="http://www.orcid.org/ns/employment"
           xmlns:education="http://www.orcid.org/ns/education"
           xmlns:other-name="http://www.orcid.org/ns/other-name"
           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
           xmlns:funding="http://www.orcid.org/ns/funding"
           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
           xmlns:service="http://www.orcid.org/ns/service"
           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
           xmlns:distinction="http://www.orcid.org/ns/distinction"
           xmlns:internal="http://www.orcid.org/ns/internal"
           xmlns:membership="http://www.orcid.org/ns/membership"
           xmlns:person="http://www.orcid.org/ns/person"
           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
           xmlns:activities="http://www.orcid.org/ns/activities"
           xmlns:qualification="http://www.orcid.org/ns/qualification"
           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
           xmlns:error="http://www.orcid.org/ns/error"
           xmlns:preferences="http://www.orcid.org/ns/preferences"
           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
           xmlns:work="http://www.orcid.org/ns/work"
           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
           path="/0000-0003-2760-1191/work/28776099" visibility="public">
    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
    <common:source>
        <common:source-orcid>
            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
            <common:path>0000-0002-9157-3431</common:path>
            <common:host>orcid.org</common:host>
        </common:source-orcid>
        <common:source-name>Europe PubMed Central</common:source-name>
    </common:source>
    <work:title>
        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
            ST-Segment-Elevation Myocardial Infarction.</common:title>
    </work:title>
    <work:citation>
        <work:citation-type>formatted-unspecified</work:citation-type>
        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
    </work:citation>
    <work:type>journal-article</work:type>
    <common:publication-date>
        <common:year>2016</common:year>
        <common:month>11</common:month>
    </common:publication-date>
    <common:external-ids>
        <common:external-id>
            <common:external-id-type>pmid</common:external-id-type>
            <common:external-id-value>27899851</common:external-id-value>
            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
        <common:external-id>
            <common:external-id-type>pmc</common:external-id-type>
            <common:external-id-value>PMC5126442</common:external-id-value>
            <common:external-id-normalized transient="true"
            >PMC5126442</common:external-id-normalized>
            <common:external-id-relationship>self</common:external-id-relationship>
        </common:external-id>
    </common:external-ids>
    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
    <work:contributors>
        <work:contributor>
            <work:contributor-attributes>
                <work:contributor-sequence>seq0</work:contributor-sequence>
                <work:contributor-role>role0</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>creditname1</work:credit-name>
        </work:contributor>
        <work:contributor>
            <work:credit-name>creditname2</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>seq2</work:contributor-sequence>
                <work:contributor-role></work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name>creditname3</work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence></work:contributor-sequence>
                <work:contributor-role>role3</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
        <work:contributor>
            <work:credit-name></work:credit-name>
            <work:contributor-attributes>
                <work:contributor-sequence>seq4</work:contributor-sequence>
                <work:contributor-role>role4</work:contributor-role>
            </work:contributor-attributes>
        </work:contributor>
    </work:contributors>
 </work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment.xml
@ -0,0 +1,50 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
                       xmlns:history="http://www.orcid.org/ns/history"
                       xmlns:employment="http://www.orcid.org/ns/employment"
                       xmlns:education="http://www.orcid.org/ns/education"
                       xmlns:other-name="http://www.orcid.org/ns/other-name"
                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
                       xmlns:funding="http://www.orcid.org/ns/funding"
                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
                       xmlns:service="http://www.orcid.org/ns/service"
                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
                       xmlns:distinction="http://www.orcid.org/ns/distinction"
                       xmlns:internal="http://www.orcid.org/ns/internal"
                       xmlns:membership="http://www.orcid.org/ns/membership"
                       xmlns:person="http://www.orcid.org/ns/person"
                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
                       xmlns:activities="http://www.orcid.org/ns/activities"
                       xmlns:qualification="http://www.orcid.org/ns/qualification"
                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
                       xmlns:error="http://www.orcid.org/ns/error"
                       xmlns:preferences="http://www.orcid.org/ns/preferences"
                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
                       put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
                       visibility="public">
    <common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
    <common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
    <common:source>
        <common:source-orcid>
            <common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
            <common:path>0000-0001-5010-5001</common:path>
            <common:host>orcid.org</common:host>
        </common:source-orcid>
        <common:source-name>Quang Nguyen</common:source-name>
    </common:source>
    <common:organization>
        <common:name>Beth Israel Deaconess Medical Center</common:name>
        <common:address>
            <common:city>Boston</common:city>
            <common:region>MA</common:region>
            <common:country>US</common:country>
        </common:address>
        <common:disambiguated-organization>
            <common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
        </common:disambiguated-organization>
    </common:organization>
 </employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_2.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_2.xml
@ -0,0 +1,55 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
                       xmlns:history="http://www.orcid.org/ns/history"
                       xmlns:employment="http://www.orcid.org/ns/employment"
                       xmlns:education="http://www.orcid.org/ns/education"
                       xmlns:other-name="http://www.orcid.org/ns/other-name"
                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
                       xmlns:funding="http://www.orcid.org/ns/funding"
                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
                       xmlns:service="http://www.orcid.org/ns/service"
                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
                       xmlns:distinction="http://www.orcid.org/ns/distinction"
                       xmlns:internal="http://www.orcid.org/ns/internal"
                       xmlns:membership="http://www.orcid.org/ns/membership"
                       xmlns:person="http://www.orcid.org/ns/person"
                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
                       xmlns:activities="http://www.orcid.org/ns/activities"
                       xmlns:qualification="http://www.orcid.org/ns/qualification"
                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
                       xmlns:error="http://www.orcid.org/ns/error"
                       xmlns:preferences="http://www.orcid.org/ns/preferences"
                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
                       put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
                       visibility="public">
    <common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
    <common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
    <common:source>
        <common:source-orcid>
            <common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
            <common:path>0000-0001-5011-3001</common:path>
            <common:host>orcid.org</common:host>
        </common:source-orcid>
        <common:source-name>zhengyan li</common:source-name>
    </common:source>
    <common:start-date>
        <common:year>2008</common:year>
        <common:month>09</common:month>
        <common:day>01</common:day>
    </common:start-date>
    <common:organization>
        <common:name>Anhui Academy of Agricultural Sciences</common:name>
        <common:address>
            <common:city>Hefei</common:city>
            <common:region>Anhui</common:region>
            <common:country>CN</common:country>
        </common:address>
        <common:disambiguated-organization>
            <common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
        </common:disambiguated-organization>
    </common:organization>
 </employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_3.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_3.xml
@ -0,0 +1,62 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
                       xmlns:history="http://www.orcid.org/ns/history"
                       xmlns:employment="http://www.orcid.org/ns/employment"
                       xmlns:education="http://www.orcid.org/ns/education"
                       xmlns:other-name="http://www.orcid.org/ns/other-name"
                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
                       xmlns:funding="http://www.orcid.org/ns/funding"
                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
                       xmlns:service="http://www.orcid.org/ns/service"
                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
                       xmlns:distinction="http://www.orcid.org/ns/distinction"
                       xmlns:internal="http://www.orcid.org/ns/internal"
                       xmlns:membership="http://www.orcid.org/ns/membership"
                       xmlns:person="http://www.orcid.org/ns/person"
                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
                       xmlns:activities="http://www.orcid.org/ns/activities"
                       xmlns:qualification="http://www.orcid.org/ns/qualification"
                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
                       xmlns:error="http://www.orcid.org/ns/error"
                       xmlns:preferences="http://www.orcid.org/ns/preferences"
                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
                       put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
                       visibility="public">
    <common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
    <common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
    <common:source>
        <common:source-orcid>
            <common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
            <common:path>0000-0001-5012-1001</common:path>
            <common:host>orcid.org</common:host>
        </common:source-orcid>
        <common:source-name>Asma Bazzi</common:source-name>
    </common:source>
    <common:department-name>Pathology and Laboratory Medicine</common:department-name>
    <common:role-title>Medical Laboratory Technologist</common:role-title>
    <common:start-date>
        <common:year>1994</common:year>
        <common:month>10</common:month>
        <common:day>01</common:day>
    </common:start-date>
    <common:end-date>
        <common:year>2000</common:year>
        <common:month>06</common:month>
        <common:day>30</common:day>
    </common:end-date>
    <common:organization>
        <common:name>American University of Beirut</common:name>
        <common:address>
            <common:city>Hamra</common:city>
            <common:region>Beirut</common:region>
            <common:country>LB</common:country>
        </common:address>
        <common:disambiguated-organization>
            <common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
        </common:disambiguated-organization>
    </common:organization>
 </employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/summary.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/summary.xml
@ -0,0 +1,581 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
    <common:orcid-identifier>
        <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
        <common:path>0000-0001-5045-1000</common:path>
        <common:host>orcid.org</common:host>
    </common:orcid-identifier>
    <preferences:preferences>
        <preferences:locale>es</preferences:locale>
    </preferences:preferences>
    <history:history>
        <history:creation-method>Direct</history:creation-method>
        <history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
        <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
        <history:claimed>true</history:claimed>
        <history:verified-email>true</history:verified-email>
        <history:verified-primary-email>true</history:verified-primary-email>
    </history:history>
    <person:person path="/0000-0001-5045-1000/person">
        <person:name visibility="public" path="0000-0001-5045-1000">
            <common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
            <common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
            <personal-details:given-names>Patricio</personal-details:given-names>
            <personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
        </person:name>
        <other-name:other-names path="/0000-0001-5045-1000/other-names"/>
        <person:biography visibility="public" path="/0000-0001-5045-1000/biography">
            <common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
            <common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
            <personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
        </person:biography>
        <researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
        <email:emails path="/0000-0001-5045-1000/email"/>
        <address:addresses path="/0000-0001-5045-1000/address"/>
        <keyword:keywords path="/0000-0001-5045-1000/keywords"/>
        <external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
            <common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
            <external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
                <common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
                <common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
                <common:source>
                    <common:source-client-id>
                        <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
                        <common:path>0000-0002-5982-8983</common:path>
                        <common:host>orcid.org</common:host>
                    </common:source-client-id>
                    <common:source-name>Scopus - Elsevier</common:source-name>
                    <common:assertion-origin-orcid>
                        <common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
                        <common:path>0000-0001-7291-3210</common:path>
                        <common:host>orcid.org</common:host>
                    </common:assertion-origin-orcid>
                    <common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
                </common:source>
                <common:external-id-type>Scopus Author ID</common:external-id-type>
                <common:external-id-value>6602255248</common:external-id-value>
                <common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&amp;partnerID=MN8TOARS</common:external-id-url>
                <common:external-id-relationship>self</common:external-id-relationship>
            </external-identifier:external-identifier>
        </external-identifier:external-identifiers>
    </person:person>
    <activities:activities-summary path="/0000-0001-5045-1000/activities">
        <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
        <activities:distinctions path="/0000-0001-5045-1000/distinctions">
            <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
            <activities:affiliation-group>
                <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
                <common:external-ids/>
                <distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
                    <common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
                    <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Programa de Maestría</common:department-name>
                    <common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
                    <common:start-date>
                        <common:year>2014</common:year>
                        <common:month>10</common:month>
                        <common:day>20</common:day>
                    </common:start-date>
                    <common:organization>
                        <common:name>Instituto de Altos Estudios Nacionales</common:name>
                        <common:address>
                            <common:city>Quito</common:city>
                            <common:region>Pichincha</common:region>
                            <common:country>EC</common:country>
                        </common:address>
                        <common:disambiguated-organization>
                            <common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
                            <common:disambiguation-source>ROR</common:disambiguation-source>
                        </common:disambiguated-organization>
                    </common:organization>
                </distinction:distinction-summary>
            </activities:affiliation-group>
        </activities:distinctions>
        <activities:educations path="/0000-0001-5045-1000/educations">
            <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
            <activities:affiliation-group>
                <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
                <common:external-ids/>
                <education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
                    <common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
                    <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Programa de Doctorado en Sociología</common:department-name>
                    <common:role-title>Doctorando del Programa de Sociología</common:role-title>
                    <common:start-date>
                        <common:year>2020</common:year>
                        <common:month>11</common:month>
                        <common:day>06</common:day>
                    </common:start-date>
                    <common:organization>
                        <common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
                        <common:address>
                            <common:city>Madrid</common:city>
                            <common:region>Comunidad de Madrid</common:region>
                            <common:country>ES</common:country>
                        </common:address>
                        <common:disambiguated-organization>
                            <common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
                            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
                        </common:disambiguated-organization>
                    </common:organization>
                </education:education-summary>
            </activities:affiliation-group>
        </activities:educations>
        <activities:employments path="/0000-0001-5045-1000/employments">
            <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
            <activities:affiliation-group>
                <common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
                <common:external-ids/>
                <employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
                    <common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
                    <common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
                    <common:role-title>Especialista de Proyectos y docente</common:role-title>
                    <common:start-date>
                        <common:year>2021</common:year>
                        <common:month>11</common:month>
                        <common:day>01</common:day>
                    </common:start-date>
                    <common:organization>
                        <common:name>Universidad de las Artes</common:name>
                        <common:address>
                            <common:city>Guayaquil</common:city>
                            <common:region>Guayas</common:region>
                            <common:country>EC</common:country>
                        </common:address>
                        <common:disambiguated-organization>
                            <common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
                            <common:disambiguation-source>ROR</common:disambiguation-source>
                        </common:disambiguated-organization>
                    </common:organization>
                </employment:employment-summary>
            </activities:affiliation-group>
            <activities:affiliation-group>
                <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
                <common:external-ids/>
                <employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
                    <common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
                    <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
                    <common:role-title>Director</common:role-title>
                    <common:start-date>
                        <common:year>2019</common:year>
                        <common:month>11</common:month>
                        <common:day>05</common:day>
                    </common:start-date>
                    <common:end-date>
                        <common:year>2021</common:year>
                        <common:month>10</common:month>
                        <common:day>31</common:day>
                    </common:end-date>
                    <common:organization>
                        <common:name>Universidad Regional Amazónica IKIAM</common:name>
                        <common:address>
                            <common:city>Tena</common:city>
                            <common:region>Napo</common:region>
                            <common:country>EC</common:country>
                        </common:address>
                        <common:disambiguated-organization>
                            <common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
                            <common:disambiguation-source>ROR</common:disambiguation-source>
                        </common:disambiguated-organization>
                    </common:organization>
                    <common:url>http://ikiam.edu.ec</common:url>
                </employment:employment-summary>
            </activities:affiliation-group>
        </activities:employments>
        <activities:fundings path="/0000-0001-5045-1000/fundings"/>
        <activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
        <activities:memberships path="/0000-0001-5045-1000/memberships">
            <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
            <activities:affiliation-group>
                <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
                <common:external-ids/>
                <membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
                    <common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
                    <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Artes Escénicas</common:department-name>
                    <common:role-title>Miembro</common:role-title>
                    <common:start-date>
                        <common:year>2000</common:year>
                        <common:month>07</common:month>
                        <common:day>15</common:day>
                    </common:start-date>
                    <common:organization>
                        <common:name>Casa de la Cultura Ecuatoriana</common:name>
                        <common:address>
                            <common:city>Riobamba</common:city>
                            <common:region>Sierra Centro</common:region>
                            <common:country>EC</common:country>
                        </common:address>
                    </common:organization>
                </membership:membership-summary>
            </activities:affiliation-group>
        </activities:memberships>
        <activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
        <activities:qualifications path="/0000-0001-5045-1000/qualifications">
            <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
            <activities:affiliation-group>
                <common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
                <common:external-ids/>
                <qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
                    <common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
                    <common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Programa de Gobernabilidad</common:department-name>
                    <common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
                    <common:start-date>
                        <common:year>2014</common:year>
                        <common:month>10</common:month>
                        <common:day>20</common:day>
                    </common:start-date>
                    <common:end-date>
                        <common:year>2017</common:year>
                        <common:month>01</common:month>
                        <common:day>26</common:day>
                    </common:end-date>
                    <common:organization>
                        <common:name>Instituto de Altos Estudios Nacionales</common:name>
                        <common:address>
                            <common:city>Quito</common:city>
                            <common:region>Pichincha</common:region>
                            <common:country>EC</common:country>
                        </common:address>
                        <common:disambiguated-organization>
                            <common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
                            <common:disambiguation-source>ROR</common:disambiguation-source>
                        </common:disambiguated-organization>
                    </common:organization>
                </qualification:qualification-summary>
            </activities:affiliation-group>
            <activities:affiliation-group>
                <common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
                <common:external-ids/>
                <qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
                    <common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
                    <common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Posgrados</common:department-name>
                    <common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
                    <common:start-date>
                        <common:year>2001</common:year>
                        <common:month>03</common:month>
                        <common:day>09</common:day>
                    </common:start-date>
                    <common:end-date>
                        <common:year>2003</common:year>
                        <common:month>02</common:month>
                        <common:day>27</common:day>
                    </common:end-date>
                    <common:organization>
                        <common:name>Universidad Nacional de Chimborazo</common:name>
                        <common:address>
                            <common:city>Riobamba</common:city>
                            <common:region>Chimborazo</common:region>
                            <common:country>EC</common:country>
                        </common:address>
                        <common:disambiguated-organization>
                            <common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
                            <common:disambiguation-source>ROR</common:disambiguation-source>
                        </common:disambiguated-organization>
                    </common:organization>
                </qualification:qualification-summary>
            </activities:affiliation-group>
            <activities:affiliation-group>
                <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
                <common:external-ids/>
                <qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
                    <common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
                    <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Ciencias de la Educación</common:department-name>
                    <common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
                    <common:start-date>
                        <common:year>1994</common:year>
                        <common:month>10</common:month>
                        <common:day>03</common:day>
                    </common:start-date>
                    <common:end-date>
                        <common:year>2000</common:year>
                        <common:month>01</common:month>
                        <common:day>31</common:day>
                    </common:end-date>
                    <common:organization>
                        <common:name>Universidad Nacional de Chimborazo</common:name>
                        <common:address>
                            <common:city>Riobamba</common:city>
                            <common:region>Chimborazo</common:region>
                            <common:country>EC</common:country>
                        </common:address>
                        <common:disambiguated-organization>
                            <common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
                            <common:disambiguation-source>ROR</common:disambiguation-source>
                        </common:disambiguated-organization>
                    </common:organization>
                </qualification:qualification-summary>
            </activities:affiliation-group>
            <activities:affiliation-group>
                <common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
                <common:external-ids/>
                <qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
                    <common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
                    <common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <common:department-name>Facultad de Artes</common:department-name>
                    <common:role-title>Licenciado en Artes</common:role-title>
                    <common:start-date>
                        <common:year>1989</common:year>
                        <common:month>09</common:month>
                        <common:day>05</common:day>
                    </common:start-date>
                    <common:end-date>
                        <common:year>1997</common:year>
                        <common:month>08</common:month>
                        <common:day>07</common:day>
                    </common:end-date>
                    <common:organization>
                        <common:name>Universidad Central del Ecuador</common:name>
                        <common:address>
                            <common:city>Quito</common:city>
                            <common:region>Pichincha</common:region>
                            <common:country>EC</common:country>
                        </common:address>
                        <common:disambiguated-organization>
                            <common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
                            <common:disambiguation-source>FUNDREF</common:disambiguation-source>
                        </common:disambiguated-organization>
                    </common:organization>
                </qualification:qualification-summary>
            </activities:affiliation-group>
        </activities:qualifications>
        <activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
        <activities:services path="/0000-0001-5045-1000/services"/>
        <activities:works path="/0000-0001-5045-1000/works">
            <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
            <activities:group>
                <common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
                <common:external-ids/>
                <work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
                    <common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
                    <common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <work:title>
                        <common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
                    </work:title>
                    <common:external-ids>
                        <common:external-id>
                            <common:external-id-type>isbn</common:external-id-type>
                            <common:external-id-value>978-9942-29-089-2</common:external-id-value>
                            <common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
                            <common:external-id-relationship>part-of</common:external-id-relationship>
                        </common:external-id>
                    </common:external-ids>
                    <work:type>book-chapter</work:type>
                    <common:publication-date>
                        <common:year>2023</common:year>
                        <common:month>06</common:month>
                        <common:day>07</common:day>
                    </common:publication-date>
                    <work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
                </work:work-summary>
            </activities:group>
            <activities:group>
                <common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
                <common:external-ids/>
                <work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
                    <common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
                    <common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <work:title>
                        <common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
                    </work:title>
                    <common:external-ids/>
                    <work:type>conference-abstract</work:type>
                    <common:publication-date>
                        <common:year>2022</common:year>
                        <common:month>10</common:month>
                        <common:day>06</common:day>
                    </common:publication-date>
                </work:work-summary>
            </activities:group>
            <activities:group>
                <common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
                <common:external-ids>
                    <common:external-id>
                        <common:external-id-type>other-id</common:external-id-type>
                        <common:external-id-value>2018</common:external-id-value>
                        <common:external-id-normalized transient="true">2018</common:external-id-normalized>
                        <common:external-id-relationship>self</common:external-id-relationship>
                    </common:external-id>
                </common:external-ids>
                <work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
                    <common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
                    <common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <work:title>
                        <common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
                    </work:title>
                    <common:external-ids>
                        <common:external-id>
                            <common:external-id-type>other-id</common:external-id-type>
                            <common:external-id-value>2018</common:external-id-value>
                            <common:external-id-normalized transient="true">2018</common:external-id-normalized>
                            <common:external-id-relationship>self</common:external-id-relationship>
                        </common:external-id>
                    </common:external-ids>
                    <common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
                    <work:type>conference-poster</work:type>
                    <common:publication-date>
                        <common:year>2018</common:year>
                        <common:month>11</common:month>
                        <common:day>30</common:day>
                    </common:publication-date>
                    <work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
                </work:work-summary>
            </activities:group>
            <activities:group>
                <common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
                <common:external-ids/>
                <work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
                    <common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
                    <common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <work:title>
                        <common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
                    </work:title>
                    <common:external-ids/>
                    <work:type>dissertation-thesis</work:type>
                    <common:publication-date>
                        <common:year>2017</common:year>
                        <common:month>01</common:month>
                        <common:day>26</common:day>
                    </common:publication-date>
                </work:work-summary>
            </activities:group>
            <activities:group>
                <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
                <common:external-ids/>
                <work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
                    <common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
                    <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
                    <common:source>
                        <common:source-orcid>
                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
                            <common:path>0000-0001-5045-1000</common:path>
                            <common:host>orcid.org</common:host>
                        </common:source-orcid>
                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
                    </common:source>
                    <work:title>
                        <common:title>La Rebelión de los Dioses</common:title>
                    </work:title>
                    <common:external-ids/>
                    <common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
                    <work:type>registered-copyright</work:type>
                    <common:publication-date>
                        <common:year>2001</common:year>
                        <common:month>08</common:month>
                        <common:day>28</common:day>
                    </common:publication-date>
                    <work:journal-title>Editorial pedagógica freire</work:journal-title>
                </work:work-summary>
            </activities:group>
        </activities:works>
    </activities:activities-summary>
 </record:record>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
@ -133,32 +133,6 @@
            <arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
        <ok to="PreProcessORCID"/>
        <error to="Kill"/>
    </action>
    <!--  ORCID  SECTION -->
    <action name="PreProcessORCID">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>Convert ORCID to Dataset</name>
            <class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
            <jar>dhp-doiboost-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.sql.shuffle.partitions=3840
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
            <arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
@ -59,10 +59,10 @@
        </property>
        <!--    ORCID Parameters    -->
-        <property>
+<!--        <property>-->
-            <name>workingPathOrcid</name>
+<!--            <name>workingPathOrcid</name>-->
-            <description>the ORCID working path</description>
+<!--            <description>the ORCID working path</description>-->
-        </property>
+<!--        </property>-->
    </parameters>
@ -170,32 +170,6 @@
            <arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
        <ok to="ProcessORCID"/>
        <error to="Kill"/>
    </action>
    <!--  ORCID  SECTION -->
    <action name="ProcessORCID">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>Convert ORCID to Dataset</name>
            <class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
            <jar>dhp-doiboost-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.sql.shuffle.partitions=3840
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
            <arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
        <ok to="CreateDOIBoost"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
      Encoders.tuple(Encoders.STRING, mapEncoderPub)
    implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
-    logger.info("Phase 2) Join Crossref with UnpayWall")
+    logger.info("Phase 1) Join Crossref with UnpayWall")
    val crossrefPublication: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$workingDirPath/firstJoin")
    logger.info("Phase 3) Join Result with ORCID")
    val fj: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
    val orcidPublication: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
    fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
      .map(applyMerge)
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$workingDirPath/secondJoin")
-    logger.info("Phase 4) Join Result with MAG")
+    logger.info("Phase 2) Join Result with MAG")
    val sj: Dataset[(String, Publication)] =
-      spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
+      spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
    val magPublication: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -107,7 +107,7 @@ case object Crossref2Oaf {
      .map(f => f.id)
  }
-  def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType:String): Result = {
+  def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    //MAPPING Crossref DOI into PID
@ -372,7 +372,7 @@ case object Crossref2Oaf {
      objectType,
      mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
    )
-    mappingResult(result, json, cOBJCategory, originalType)
+    mappingResult(result, json, cOBJCategory, objectSubType)
    if (result == null || result.getId == null)
      return List()
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json
@ -0,0 +1,26 @@
 [
  {
    "paramName": "mt",
    "paramLongName": "master",
    "paramDescription": "should be local or yarn",
    "paramRequired": true
  },
  {
    "paramName": "op",
    "paramLongName": "orcidPath",
    "paramDescription": "the path of the orcid Table generated by the dump",
    "paramRequired": true
  },
  {
    "paramName": "gp",
    "paramLongName": "graphPath",
    "paramDescription": "the path of the graph we want to apply enrichment",
    "paramRequired": true
  },
  {
    "paramName": "tp",
    "paramLongName": "targetPath",
    "paramDescription": "the output path of the graph enriched",
    "paramRequired": true
  }
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/config-default.xml
@ -0,0 +1,34 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hiveMetastoreUris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
    <property>
        <name>hiveJdbcUrl</name>
        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
    </property>
    <property>
        <name>hiveDbName</name>
        <value>openaire</value>
    </property>
    <property>
        <name>oozie.launcher.mapreduce.user.classpath.first</name>
        <value>true</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml
@ -0,0 +1,88 @@
 <workflow-app name="Enrich_graph_with_ORCID_Workflow" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>orcidPath</name>
            <description>the path of the orcid Table generated by the dump</description>
        </property>
        <property>
            <name>graphPath</name>
            <description>the path of the graph we want to apply enrichment</description>
        </property>
        <property>
            <name>targetPath</name>
            <description>the output path of the graph enriched</description>
        </property>
    </parameters>
    <start to="EnrichGraph"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="EnrichGraph">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Enrich Graph with ORCID</name>
            <class>eu.dnetlib.dhp.enrich.orcid.SparkEnrichGraphWithOrcidAuthors</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.executor.memoryOverhead=2g
                --conf spark.sql.shuffle.partitions=3000
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>--orcidPath</arg>
            <arg>${orcidPath}</arg>
            <arg>--targetPath</arg>
            <arg>${targetPath}</arg>
            <arg>--graphPath</arg>
            <arg>${graphPath}</arg>
            <arg>--master</arg>
            <arg>yarn</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <action name="copy_datasource">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${graphPath}/datasource</arg>
            <arg>${nameNode}/${targetPath}/datasource</arg>
        </distcp>
        <ok to="copy_organization"/>
        <error to="Kill"/>
    </action>
    <action name="copy_organization">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${graphPath}/organization</arg>
            <arg>${nameNode}/${targetPath}/organization</arg>
        </distcp>
        <ok to="copy_project"/>
        <error to="Kill"/>
    </action>
    <action name="copy_project">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${graphPath}/project</arg>
            <arg>${nameNode}/${targetPath}/project</arg>
        </distcp>
        <ok to="copy_relation"/>
        <error to="Kill"/>
    </action>
    <action name="copy_relation">
        <distcp xmlns="uri:oozie:distcp-action:0.2">
            <arg>${nameNode}/${graphPath}/relation</arg>
            <arg>${nameNode}/${targetPath}/relation</arg>
        </distcp>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/AuthorEnricher.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/AuthorEnricher.scala
@ -0,0 +1,40 @@
 package eu.dnetlib.dhp.enrich.orcid
 import eu.dnetlib.dhp.schema.common.ModelConstants
 import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
 import eu.dnetlib.dhp.schema.sx.OafUtils
 import org.apache.spark.sql.Row
 import scala.collection.JavaConverters._
 object AuthorEnricher extends Serializable {
  def createAuthor(givenName: String, familyName: String, orcid: String): Author = {
    val a = new Author
    a.setName(givenName)
    a.setSurname(familyName)
    a.setFullname(s"$givenName $familyName")
    val pid = OafUtils.createSP(orcid, ModelConstants.ORCID, ModelConstants.ORCID)
    pid.setDataInfo(OafUtils.generateDataInfo())
    pid.getDataInfo.setProvenanceaction(OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT"))
    a.setPid(List(pid).asJava)
    a
  }
  def toOAFAuthor(r: Row): java.util.List[Author] = {
    r.getList[Row](1)
      .asScala
      .map(s => createAuthor(s.getAs[String]("givenName"), s.getAs[String]("familyName"), s.getAs[String]("orcid")))
      .toList
      .asJava
  }
 //  def enrichAuthor(p:Publication,r:Row): Unit = {
 //    val k:Map[String, OAuthor] =r.getList[Row](1).asScala.map(s => (s.getAs[String]("orcid"), OAuthor(s.getAs[String]("givenName") ,s.getAs[String]("familyName") ))).groupBy(_._1).mapValues(_.map(_._2).head)
 //    println(k)
 //
 //
 //
 //  }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
@ -0,0 +1,138 @@
 package eu.dnetlib.dhp.enrich.orcid
 import eu.dnetlib.dhp.application.AbstractScalaApplication
 import eu.dnetlib.dhp.oa.merge.AuthorMerger
 import eu.dnetlib.dhp.schema.common.ModelSupport
 import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql._
 import org.slf4j.{Logger, LoggerFactory}
 import scala.collection.JavaConverters._
 class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
    extends AbstractScalaApplication(propertyPath, args, log: Logger) {
  /** Here all the spark applications runs this method
    * where the whole logic of the spark node is defined
    */
  override def run(): Unit = {
    val graphPath = parser.get("graphPath")
    log.info(s"graphPath is '$graphPath'")
    val orcidPath = parser.get("orcidPath")
    log.info(s"orcidPath is '$orcidPath'")
    val targetPath = parser.get("targetPath")
    log.info(s"targetPath is '$targetPath'")
    val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
 //    ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result))
    enrichResult(
      spark,
      s"$graphPath/publication",
      orcidPublication,
      s"$targetPath/publication",
      Encoders.bean(classOf[Publication])
    )
    enrichResult(
      spark,
      s"$graphPath/dataset",
      orcidPublication,
      s"$targetPath/dataset",
      Encoders.bean(classOf[eu.dnetlib.dhp.schema.oaf.Dataset])
    )
    enrichResult(
      spark,
      s"$graphPath/software",
      orcidPublication,
      s"$targetPath/software",
      Encoders.bean(classOf[Software])
    )
    enrichResult(
      spark,
      s"$graphPath/otherresearchproduct",
      orcidPublication,
      s"$targetPath/otherresearchproduct",
      Encoders.bean(classOf[OtherResearchProduct])
    )
  }
  private def enrichResult[T <: Result](
    spark: SparkSession,
    graphPath: String,
    orcidPublication: Dataset[Row],
    outputPath: String,
    enc: Encoder[T]
  ): Unit = {
    val entities = spark.read
      .schema(enc.schema)
      .json(graphPath)
      .select(col("id"), col("datainfo"), col("instance"))
      .where("datainfo.deletedbyinference != true")
      .drop("datainfo")
      .withColumn("instances", explode(col("instance")))
      .withColumn("pids", explode(col("instances.pid")))
      .select(
        col("pids.qualifier.classid").alias("pid_schema"),
        col("pids.value").alias("pid_value"),
        col("id").alias("dnet_id")
      )
    val orcidDnet = orcidPublication
      .join(
        entities,
        lower(col("schema")).equalTo(lower(col("pid_schema"))) &&
        lower(col("value")).equalTo(lower(col("pid_value"))),
        "inner"
      )
      .groupBy(col("dnet_id"))
      .agg(collect_set(orcidPublication("author")).alias("orcid_authors"))
      .select("dnet_id", "orcid_authors")
      .cache()
    orcidDnet.count()
    val result = spark.read.schema(enc.schema).json(graphPath).as[T](enc)
    result
      .joinWith(orcidDnet, result("id").equalTo(orcidDnet("dnet_id")), "left")
      .map {
        case (r: T, null) =>
          r
        case (p: T, r: Row) =>
          p.setAuthor(AuthorMerger.enrichOrcid(p.getAuthor, AuthorEnricher.toOAFAuthor(r)))
          p
      }(enc)
      .write
      .mode(SaveMode.Overwrite)
      .option("compression", "gzip")
      .json(outputPath)
  }
  private def generateOrcidTable(spark: SparkSession, inputPath: String): Dataset[Row] = {
    val orcidAuthors =
      spark.read.load(s"$inputPath/Authors").select("orcid", "familyName", "givenName", "creditName", "otherNames")
    val orcidWorks = spark.read
      .load(s"$inputPath/Works")
      .select(col("orcid"), explode(col("pids")).alias("identifier"))
      .where(
        "identifier.schema IN('doi','pmid','pmc','arxiv','handle')"
      )
    val orcidPublication = orcidAuthors
      .join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))
      .select(
        col("identifier.schema").alias("schema"),
        col("identifier.value").alias("value"),
        struct(orcidAuthors("orcid").alias("orcid"), col("givenName"), col("familyName")).alias("author")
      )
    orcidPublication.cache()
  }
 }
 object SparkEnrichGraphWithOrcidAuthors {
  val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass)
  def main(args: Array[String]): Unit = {
    new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log)
      .initialize()
      .run()
  }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala
@ -0,0 +1,77 @@
 package eu.dnetlib.dhp.enrich.orcid
 import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
 import org.apache.spark.sql.{Column, Encoder, Encoders, Row, SparkSession}
 import org.junit.jupiter.api.Test
 import org.slf4j.{Logger, LoggerFactory}
 import org.apache.spark.sql.functions._
 class EnrichOrcidTest {
  val log: Logger = LoggerFactory.getLogger(getClass)
  def test() = {
    val spark = SparkSession.builder().master("local[*]").getOrCreate()
 //    spark.sparkContext.setLogLevel("ERROR")
 //    new SparkEnrichGraphWithOrcidAuthors(null, null, null)
 //      .enrichResult(
 //        spark,
 //        "/Users/sandro/orcid_test/publication",
 //        "",
 //        "/tmp/graph/",
 //        Encoders.bean(classOf[Publication])
 //      )
    val schema = Encoders.bean(classOf[Publication]).schema
 //
 //    val simplifyAuthor = udf((r: Seq[Row]) => {
 //      r
 //        .map(k =>
 //          AuthorPid(
 //            k.getAs[String]("fullname"),
 //            k.getAs[Seq[Row]]("pid")
 //              .map(p => Pid(p.getAs[Row]("qualifier").getAs[String]("classid"), p.getAs[String]("value")))
 //              .toList
 //          )
 //        )
 //        .filter(l => l.pids.nonEmpty)
 //        .toList
 //    })
 //
 //    val wrong_orcid_intersection = udf((a: Seq[Row]) => {
 //      a.map(author => {
 //        val pids_with_orcid: Seq[Row] = author
 //          .getAs[Seq[Row]]("pids")
 //          .filter(p =>
 //            p.getAs[String]("pidScheme") != null && p.getAs[String]("pidScheme").toLowerCase.contains("orcid")
 //          )
 //        if (pids_with_orcid.exists(p => p.getAs[String]("pidScheme").equals("ORCID"))) {
 //          if (pids_with_orcid.map(p => p.getAs[String]("pidValue").toLowerCase).distinct.size > 1) {
 //            AuthorPid(
 //              author.getAs[String]("fullName"),
 //              pids_with_orcid.map(p => Pid(p.getAs[String]("pidScheme"), p.getAs[String]("pidValue"))).toList
 //            )
 //
 //          } else
 //            null
 //        } else
 //          null
 //      }).filter(author => author != null)
 //    })
    Encoders
    import spark.implicits._
 //    val enriched = spark.read
 //      .schema(schema)
 //      .json("/Users/sandro/orcid_test/publication_enriched")
 //      .select(col("id"), explode(col("author")).as("authors"))
 //      .withColumn("ap", col("authors.pid.qualifier.classid"))
 //      .withColumn("dp", col("authors.pid.datainfo.provenanceAction.classid"))
 //
 //      .show()
  }
 }