Merge pull request 'ORCID Enrichment and Download' (#364) from orcid_import into beta

Reviewed-on: #364
2023-12-01 15:05:44 +01:00 · 2023-12-01 15:05:44 +01:00 · c5ac593c07
parent 93a700742a 09d061e90b
commit c5ac593c07
43 changed files with 3251 additions and 229 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@ -4,194 +4,318 @@ package eu.dnetlib.dhp.oa.merge;
 import java.text.Normalizer;
 import java.util.*;
 import java.util.stream.Collectors;
-
 import org.apache.commons.lang3.StringUtils;
-
 import com.wcohen.ss.JaroWinkler;
-
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.pace.model.Person;
 import scala.Tuple2;

+
 public class AuthorMerger {

-	private static final Double THRESHOLD = 0.95;
+    private static final Double THRESHOLD = 0.95;

-	private AuthorMerger() {
-	}
+    private AuthorMerger() {
+    }

-	public static List<Author> merge(List<List<Author>> authors) {
+    public static List<Author> merge(List<List<Author>> authors) {

-		authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
+        authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));

-		List<Author> author = new ArrayList<>();
+        List<Author> author = new ArrayList<>();

-		for (List<Author> a : authors) {
-			author = mergeAuthor(author, a);
-		}
+        for (List<Author> a : authors) {
+            author = mergeAuthor(author, a);
+        }

-		return author;
+        return author;

-	}
+    }

-	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
-		int pa = countAuthorsPids(a);
-		int pb = countAuthorsPids(b);
-		List<Author> base;
-		List<Author> enrich;
-		int sa = authorsSize(a);
-		int sb = authorsSize(b);
+    public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
+        int pa = countAuthorsPids(a);
+        int pb = countAuthorsPids(b);
+        List<Author> base;
+        List<Author> enrich;
+        int sa = authorsSize(a);
+        int sb = authorsSize(b);

-		if (sa == sb) {
-			base = pa > pb ? a : b;
-			enrich = pa > pb ? b : a;
-		} else {
-			base = sa > sb ? a : b;
-			enrich = sa > sb ? b : a;
-		}
-		enrichPidFromList(base, enrich, threshold);
-		return base;
-	}
+        if (sa == sb) {
+            base = pa > pb ? a : b;
+            enrich = pa > pb ? b : a;
+        } else {
+            base = sa > sb ? a : b;
+            enrich = sa > sb ? b : a;
+        }
+        enrichPidFromList(base, enrich, threshold);
+        return base;
+    }

-	public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
-		return mergeAuthor(a, b, THRESHOLD);
-	}
+    public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
+        return mergeAuthor(a, b, THRESHOLD);
+    }

-	private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
-		if (base == null || enrich == null)
-			return;
+    private static void enrichPidFromList(List<Author> base, List<Author> enrich, Double threshold) {
+        if (base == null || enrich == null)
+            return;

-		// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
-		final Map<String, Author> basePidAuthorMap = base
-			.stream()
-			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
-			.flatMap(
-				a -> a
-					.getPid()
-					.stream()
-					.filter(Objects::nonNull)
-					.map(p -> new Tuple2<>(pidToComparableString(p), a)))
-			.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
+        // <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
+        final Map<String, Author> basePidAuthorMap = base
+                .stream()
+                .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
+                .flatMap(
+                        a -> a
+                                .getPid()
+                                .stream()
+                                .filter(Objects::nonNull)
+                                .map(p -> new Tuple2<>(pidToComparableString(p), a)))
+                .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));

-		// <pid, Author> (list of pid that are missing in the other list)
-		final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
-			.stream()
-			.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
-			.flatMap(
-				a -> a
-					.getPid()
-					.stream()
-					.filter(Objects::nonNull)
-					.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
-					.map(p -> new Tuple2<>(p, a)))
-			.collect(Collectors.toList());
+        // <pid, Author> (list of pid that are missing in the other list)
+        final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
+                .stream()
+                .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
+                .flatMap(
+                        a -> a
+                                .getPid()
+                                .stream()
+                                .filter(Objects::nonNull)
+                                .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
+                                .map(p -> new Tuple2<>(p, a)))
+                .collect(Collectors.toList());

-		pidToEnrich
-			.forEach(
-				a -> {
-					Optional<Tuple2<Double, Author>> simAuthor = base
-						.stream()
-						.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
-						.max(Comparator.comparing(Tuple2::_1));
+        pidToEnrich
+                .forEach(
+                        a -> {
+                            Optional<Tuple2<Double, Author>> simAuthor = base
+                                    .stream()
+                                    .map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
+                                    .max(Comparator.comparing(Tuple2::_1));

-					if (simAuthor.isPresent()) {
-						double th = threshold;
-						// increase the threshold if the surname is too short
-						if (simAuthor.get()._2().getSurname() != null
-							&& simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
-							th = 0.99;
+                            if (simAuthor.isPresent()) {
+                                double th = threshold;
+                                // increase the threshold if the surname is too short
+                                if (simAuthor.get()._2().getSurname() != null
+                                        && simAuthor.get()._2().getSurname().length() <= 3 && threshold > 0.0)
+                                    th = 0.99;

-						if (simAuthor.get()._1() > th) {
-							Author r = simAuthor.get()._2();
-							if (r.getPid() == null) {
-								r.setPid(new ArrayList<>());
-							}
+                                if (simAuthor.get()._1() > th) {
+                                    Author r = simAuthor.get()._2();
+                                    if (r.getPid() == null) {
+                                        r.setPid(new ArrayList<>());
+                                    }

-							// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
-							// it creates of fixed size, and the add method raise UnsupportedOperationException at
-							// java.util.AbstractList.add
-							final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
-							tmp.add(a._1());
-							r.setPid(tmp);
-						}
-					}
-				});
-	}
+                                    // TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
+                                    // it creates of fixed size, and the add method raise UnsupportedOperationException at
+                                    // java.util.AbstractList.add
+                                    final List<StructuredProperty> tmp = new ArrayList<>(r.getPid());
+                                    tmp.add(a._1());
+                                    r.setPid(tmp);
+                                }
+                            }
+                        });
+    }

-	public static String pidToComparableString(StructuredProperty pid) {
-		final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
-			: "";
-		return (pid.getQualifier() != null ? classid : "")
-			+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
-	}
+    public static String normalizeFullName(final String fullname) {
+        return nfd(fullname)
+                .toLowerCase()
+                // do not compact the regexes in a single expression, would cause StackOverflowError
+                // in case
+                // of large input strings
+                .replaceAll("(\\W)+", " ")
+                .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+                .replaceAll("(\\p{Punct})+", " ")
+                .replaceAll("(\\d)+", " ")
+                .replaceAll("(\\n)+", " ")

-	public static int countAuthorsPids(List<Author> authors) {
-		if (authors == null)
-			return 0;
+                .trim();
+    }

-		return (int) authors.stream().filter(AuthorMerger::hasPid).count();
-	}

-	private static int authorsSize(List<Author> authors) {
-		if (authors == null)
-			return 0;
-		return authors.size();
-	}
+    private static String authorFieldToBeCompared(Author author) {
+        if (StringUtils.isNotBlank(author.getSurname())) {
+            return author.getSurname();

-	private static Double sim(Author a, Author b) {
+        }
+        if (StringUtils.isNotBlank(author.getFullname())) {
+            return author.getFullname();
+        }
+        return null;
+    }

-		final Person pa = parse(a);
-		final Person pb = parse(b);
+    /**
+     * This method tries to figure out when two author are the same in the contest
+     * of ORCID enrichment
+     *
+     * @param left  Author in the OAF entity
+     * @param right Author ORCID
+     * @return based on a heuristic on the names of the authors if they are the same.
+     */
+    public static boolean checkORCIDSimilarity(final Author left, final Author right) {
+        final Person pl = parse(left);
+        final Person pr = parse(right);

-		// if both are accurate (e.g. they have name and surname)
-		if (pa.isAccurate() & pb.isAccurate()) {
-			return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
-				+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
-		} else {
-			return new JaroWinkler()
-				.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
-		}
-	}
+        // If one of them didn't have a surname we verify if they have the fullName not empty
+        // and verify if the normalized version is equal
+        if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
+                pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {

-	private static boolean hasPid(Author a) {
-		if (a == null || a.getPid() == null || a.getPid().isEmpty())
-			return false;
-		return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
-	}
+            if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
+                    && !pr.getFullname().isEmpty()) {
+                return pl
+                        .getFullname()
+                        .stream()
+                        .anyMatch(
+                                fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
+            } else {
+                return false;
+            }
+        }
+        // The Authors have one surname in common
+        if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {

-	private static Person parse(Author author) {
-		if (StringUtils.isNotBlank(author.getSurname())) {
-			return new Person(author.getSurname() + ", " + author.getName(), false);
-		} else {
-			if (StringUtils.isNotBlank(author.getFullname()))
-				return new Person(author.getFullname(), false);
-			else
-				return new Person("", false);
-		}
-	}
+            // If one of them has only a surname and is the same we can say that they are the same author
+            if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
+                    (pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
+                return true;
+            // The authors have the same initials of Name in common
+            if (pl
+                    .getName()
+                    .stream()
+                    .anyMatch(
+                            nl -> pr
+                                    .getName()
+                                    .stream()
+                                    .anyMatch(nr -> nr.equalsIgnoreCase(nl))))
+                return true;
+        }

-	private static String normalize(final String s) {
-		String[] normalized = nfd(s)
-			.toLowerCase()
-			// do not compact the regexes in a single expression, would cause StackOverflowError
-			// in case
-			// of large input strings
-			.replaceAll("(\\W)+", " ")
-			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
-			.replaceAll("(\\p{Punct})+", " ")
-			.replaceAll("(\\d)+", " ")
-			.replaceAll("(\\n)+", " ")
-			.trim()
-			.split(" ");
+        // Sometimes we noticed that publication have author wrote in inverse order Surname, Name
+        // We verify if we have an exact match between name and surname
+        if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
+                pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
+            return true;
+        else
+            return false;
+    }
+    //

-		Arrays.sort(normalized);
+    /**
+     * Method to enrich ORCID information in one list of authors based on another list
+     *
+     * @param baseAuthor  the Author List in the OAF Entity
+     * @param orcidAuthor The list of ORCID Author intersected
+     * @return The Author List of the OAF Entity enriched with the orcid Author
+     */
+    public static List<Author> enrichOrcid(List<Author> baseAuthor, List<Author> orcidAuthor) {

-		return String.join(" ", normalized);
-	}
+        if (baseAuthor == null || baseAuthor.isEmpty())
+            return orcidAuthor;

-	private static String nfd(final String s) {
-		return Normalizer.normalize(s, Normalizer.Form.NFD);
-	}
+        if (orcidAuthor == null || orcidAuthor.isEmpty())
+            return baseAuthor;
+
+        if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
+            return baseAuthor;
+
+        final List<Author> oAuthor = new ArrayList<>();
+        oAuthor.addAll(orcidAuthor);
+
+        baseAuthor.forEach(ba -> {
+            Optional<Author> aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
+            if (aMatch.isPresent()) {
+                final Author sameAuthor = aMatch.get();
+                addPid(ba, sameAuthor.getPid());
+                oAuthor.remove(sameAuthor);
+            }
+        });
+        return baseAuthor;
+    }
+
+    private static void addPid(final Author a, final List<StructuredProperty> pids) {
+
+        if (a.getPid() == null) {
+            a.setPid(new ArrayList<>());
+        }
+
+        a.getPid().addAll(pids);
+
+    }
+
+    public static String pidToComparableString(StructuredProperty pid) {
+        final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
+                : "";
+        return (pid.getQualifier() != null ? classid : "")
+                + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
+    }
+
+    public static int countAuthorsPids(List<Author> authors) {
+        if (authors == null)
+            return 0;
+
+        return (int) authors.stream().filter(AuthorMerger::hasPid).count();
+    }
+
+    private static int authorsSize(List<Author> authors) {
+        if (authors == null)
+            return 0;
+        return authors.size();
+    }
+
+    private static Double sim(Author a, Author b) {
+
+        final Person pa = parse(a);
+        final Person pb = parse(b);
+
+        // if both are accurate (e.g. they have name and surname)
+        if (pa.isAccurate() & pb.isAccurate()) {
+            return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
+                    + new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
+        } else {
+            return new JaroWinkler()
+                    .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
+        }
+    }
+
+    private static boolean hasPid(Author a) {
+        if (a == null || a.getPid() == null || a.getPid().isEmpty())
+            return false;
+        return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
+    }
+
+    private static Person parse(Author author) {
+        if (StringUtils.isNotBlank(author.getSurname())) {
+            return new Person(author.getSurname() + ", " + author.getName(), false);
+        } else {
+            if (StringUtils.isNotBlank(author.getFullname()))
+                return new Person(author.getFullname(), false);
+            else
+                return new Person("", false);
+        }
+    }
+
+    public static String normalize(final String s) {
+        String[] normalized = nfd(s)
+                .toLowerCase()
+                // do not compact the regexes in a single expression, would cause StackOverflowError
+                // in case
+                // of large input strings
+                .replaceAll("(\\W)+", " ")
+                .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+                .replaceAll("(\\p{Punct})+", " ")
+                .replaceAll("(\\d)+", " ")
+                .replaceAll("(\\n)+", " ")
+                .trim()
+                .split(" ");
+
+        Arrays.sort(normalized);
+
+        return String.join(" ", normalized);
+    }
+
+    private static String nfd(final String s) {
+        return Normalizer.normalize(s, Normalizer.Form.NFD);
+    }

 }
--- a/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
@ -0,0 +1,114 @@
+
+package eu.dnetlib.oa.merge;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Objects;
+
+import org.junit.jupiter.api.Test;
+import org.junit.platform.commons.util.StringUtils;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.merge.AuthorMerger;
+import eu.dnetlib.dhp.schema.oaf.Author;
+
+public class AuthorMergerTest {
+
+	@Test
+	public void testEnrcichAuthor() throws Exception {
+		final ObjectMapper mapper = new ObjectMapper();
+
+		BufferedReader pr = new BufferedReader(new InputStreamReader(
+			Objects
+				.requireNonNull(
+					AuthorMergerTest.class
+						.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
+		BufferedReader or = new BufferedReader(new InputStreamReader(
+			Objects
+				.requireNonNull(
+					AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
+
+		TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
+		};
+		String pubLine;
+
+		int i = 0;
+		while ((pubLine = pr.readLine()) != null) {
+			final String pubId = pubLine;
+			final String MatchPidOrcid = or.readLine();
+			final String pubOrcid = or.readLine();
+
+			final String data = pr.readLine();
+
+			if (StringUtils.isNotBlank(data)) {
+				List<Author> publicationAuthors = mapper.readValue(data, aclass);
+				List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
+				System.out.printf("OAF ID = %s \n", pubId);
+				System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
+				System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
+				System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
+				System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
+
+				long originalAuthorWithPiD = publicationAuthors
+					.stream()
+					.filter(
+						a -> a.getPid() != null && a
+							.getPid()
+							.stream()
+							.anyMatch(
+								p -> p.getQualifier() != null
+									&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
+					.count();
+				long start = System.currentTimeMillis();
+
+//                final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
+				final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
+
+				long enrichedAuthorWithPid = enrichedList
+					.stream()
+					.filter(
+						a -> a.getPid() != null && a
+							.getPid()
+							.stream()
+							.anyMatch(
+								p -> p.getQualifier() != null
+									&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
+					.count();
+
+				long totalTime = (System.currentTimeMillis() - start) / 1000;
+				System.out
+					.printf(
+						"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
+						enrichedAuthorWithPid);
+
+				System.out.println("=================");
+			}
+		}
+	}
+
+	@Test
+	public void checkSimilarityTest() {
+		final Author left = new Author();
+		left.setName("Anand");
+		left.setSurname("Rachna");
+		left.setFullname("Anand, Rachna");
+
+		System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
+
+		final Author right = new Author();
+		right.setName("Rachna");
+		right.setSurname("Anand");
+		right.setFullname("Rachna, Anand");
+//        System.out.println(AuthorMerger.normalize(right.getFullname()));
+		boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
+
+		assertTrue(same);
+
+	}
+
+}
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDDumpApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDDumpApplication.java
@ -0,0 +1,102 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
+
+import java.io.InputStream;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Objects;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class DownloadORCIDDumpApplication {
+	private static final Logger log = LoggerFactory.getLogger(DownloadORCIDDumpApplication.class);
+
+	private final FileSystem fileSystem;
+
+	public DownloadORCIDDumpApplication(FileSystem fileSystem) {
+		this.fileSystem = fileSystem;
+	}
+
+	public static void main(String[] args) throws Exception {
+		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					Objects
+						.requireNonNull(
+							DownloadORCIDDumpApplication.class
+								.getResourceAsStream(
+									"/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json"))));
+		argumentParser.parseArgument(args);
+
+		final String hdfsuri = argumentParser.get("namenode");
+		log.info("hdfsURI is {}", hdfsuri);
+
+		final String targetPath = argumentParser.get("targetPath");
+		log.info("targetPath is {}", targetPath);
+
+		final String apiURL = argumentParser.get("apiURL");
+		log.info("apiURL is {}", apiURL);
+
+		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
+
+		new DownloadORCIDDumpApplication(fileSystem).run(targetPath, apiURL);
+
+	}
+
+	private void downloadItem(final String name, final String itemURL, final String basePath) {
+		try {
+			final Path hdfsWritePath = new Path(String.format("%s/%s", basePath, name));
+			final FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath, true);
+			final HttpGet request = new HttpGet(itemURL);
+			final int timeout = 60; // seconds
+			final RequestConfig config = RequestConfig
+				.custom()
+				.setConnectTimeout(timeout * 1000)
+				.setConnectionRequestTimeout(timeout * 1000)
+				.setSocketTimeout(timeout * 1000)
+				.build();
+			log.info("Downloading url {} into {}", itemURL, hdfsWritePath.getName());
+			try (CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
+				CloseableHttpResponse response = client.execute(request)) {
+				int responseCode = response.getStatusLine().getStatusCode();
+				log.info("Response code is {}", responseCode);
+				if (responseCode >= 200 && responseCode < 400) {
+					IOUtils.copy(response.getEntity().getContent(), fsDataOutputStream);
+				}
+			} catch (Throwable eu) {
+				throw new RuntimeException(eu);
+			}
+		} catch (Throwable e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	protected void run(final String targetPath, final String apiURL) throws Exception {
+		final ObjectMapper mapper = new ObjectMapper();
+		final URL url = new URL(apiURL);
+		URLConnection conn = url.openConnection();
+		InputStream is = conn.getInputStream();
+		final String json = IOUtils.toString(is);
+		JsonNode jsonNode = mapper.readTree(json);
+		jsonNode
+			.get("files")
+			.forEach(i -> downloadItem(i.get("name").asText(), i.get("download_url").asText(), targetPath));
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ExtractORCIDDump.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ExtractORCIDDump.java
@ -0,0 +1,71 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import static eu.dnetlib.dhp.utils.DHPUtils.getHadoopConfiguration;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class ExtractORCIDDump {
+	private static final Logger log = LoggerFactory.getLogger(ExtractORCIDDump.class);
+
+	private final FileSystem fileSystem;
+
+	public ExtractORCIDDump(FileSystem fileSystem) {
+		this.fileSystem = fileSystem;
+	}
+
+	public static void main(String[] args) throws Exception {
+		final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					Objects
+						.requireNonNull(
+							DownloadORCIDDumpApplication.class
+								.getResourceAsStream(
+									"/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json"))));
+		argumentParser.parseArgument(args);
+
+		final String hdfsuri = argumentParser.get("namenode");
+		log.info("hdfsURI is {}", hdfsuri);
+
+		final String sourcePath = argumentParser.get("sourcePath");
+		log.info("sourcePath is {}", sourcePath);
+
+		final String targetPath = argumentParser.get("targetPath");
+		log.info("targetPath is {}", targetPath);
+
+		final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(hdfsuri));
+
+		new ExtractORCIDDump(fileSystem).run(sourcePath, targetPath);
+
+	}
+
+	public void run(final String sourcePath, final String targetPath) throws IOException, InterruptedException {
+		RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(new Path(sourcePath), false);
+		final List<ORCIDExtractor> workers = new ArrayList<>();
+		int i = 0;
+		while (ls.hasNext()) {
+			LocatedFileStatus current = ls.next();
+			if (current.getPath().getName().endsWith("tar.gz")) {
+				workers.add(new ORCIDExtractor(fileSystem, "" + i++, current.getPath(), targetPath));
+			}
+		}
+		workers.forEach(Thread::start);
+		for (ORCIDExtractor worker : workers) {
+			worker.join();
+		}
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java
@ -0,0 +1,171 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**\
+ * The ORCIDExtractor class extracts ORCID data from a TAR archive.
+ * The class creates a map of SequenceFile.Writer objects, one for each type of data that is to be extracted (e.g., employments, works, summaries).
+ * Then, it iterates over the TAR archive and writes each entry to the appropriate SequenceFile.Writer object.
+ * Finally, it closes all the SequenceFile.Writer objects.
+ */
+public class ORCIDExtractor extends Thread {
+
+	private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
+
+	private final FileSystem fileSystem;
+
+	private final String id;
+
+	private final Path sourcePath;
+
+	private final String baseOutputPath;
+
+	public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
+		this.fileSystem = fileSystem;
+		this.id = id;
+		this.sourcePath = sourcePath;
+		this.baseOutputPath = baseOutputPath;
+	}
+
+	/**
+	 * creates a map of SequenceFile.Writer objects,
+	 * one for each type of data that is to be extracted. The map is created based on the filename in the TAR archive.
+	 * For example, if the filename is employments.json, the map will contain an entry for the SequenceFile.Writer
+	 * object that writes employment data.
+	 * @return the Map
+	 */
+	private Map<String, SequenceFile.Writer> createMap() {
+		try {
+			log.info("Thread {} Creating sequence files starting from this input Path {}", id, sourcePath.getName());
+			Map<String, SequenceFile.Writer> res = new HashMap<>();
+			if (sourcePath.getName().contains("summaries")) {
+
+				final String summaryPath = String.format("%s/summaries_%s", baseOutputPath, id);
+				final SequenceFile.Writer summary_file = SequenceFile
+					.createWriter(
+						fileSystem.getConf(),
+						SequenceFile.Writer.file(new Path(summaryPath)),
+						SequenceFile.Writer.keyClass(Text.class),
+						SequenceFile.Writer.valueClass(Text.class));
+
+				log.info("Thread {} Creating only summary path here {}", id, summaryPath);
+				res.put("summary", summary_file);
+				return res;
+			} else {
+				String employmentsPath = String.format("%s/employments_%s", baseOutputPath, id);
+				final SequenceFile.Writer employments_file = SequenceFile
+					.createWriter(
+						fileSystem.getConf(),
+						SequenceFile.Writer.file(new Path(employmentsPath)),
+						SequenceFile.Writer.keyClass(Text.class),
+						SequenceFile.Writer.valueClass(Text.class));
+				res.put("employments", employments_file);
+				log.info("Thread {} Creating employments path here {}", id, employmentsPath);
+
+				final String worksPath = String.format("%s/works_%s", baseOutputPath, id);
+				final SequenceFile.Writer works_file = SequenceFile
+					.createWriter(
+						fileSystem.getConf(),
+						SequenceFile.Writer.file(new Path(worksPath)),
+						SequenceFile.Writer.keyClass(Text.class),
+						SequenceFile.Writer.valueClass(Text.class));
+				res.put("works", works_file);
+				log.info("Thread {} Creating works path here {}", id, worksPath);
+
+				return res;
+			}
+		} catch (Throwable e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	@Override
+	public void run() {
+
+		CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
+		CompressionCodec codec = factory.getCodec(sourcePath);
+		if (codec == null) {
+			System.err.println("No codec found for " + sourcePath.getName());
+			System.exit(1);
+		}
+
+		InputStream gzipInputStream = null;
+		try {
+			gzipInputStream = codec.createInputStream(fileSystem.open(sourcePath));
+			final Map<String, SequenceFile.Writer> fileMap = createMap();
+			iterateTar(fileMap, gzipInputStream);
+
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		} finally {
+			log.info("Closing gzip stream");
+			IOUtils.closeStream(gzipInputStream);
+		}
+
+	}
+
+	private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, final String path) {
+		if (sourcePath.getName().contains("summaries")) {
+			return fileMap.get("summary");
+		}
+
+		if (path.contains("works")) {
+			return fileMap.get("works");
+		}
+		if (path.contains("employments"))
+			return fileMap.get("employments");
+		return null;
+	}
+
+	private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
+
+		int extractedItem = 0;
+		try (final TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
+
+			TarArchiveEntry entry;
+			while ((entry = tais.getNextTarEntry()) != null) {
+
+				if (entry.isFile()) {
+
+					final SequenceFile.Writer fl = retrieveFile(fileMap, entry.getName());
+					if (fl != null) {
+						final Text key = new Text(entry.getName());
+						final Text value = new Text(
+							org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tais))));
+						fl.append(key, value);
+						extractedItem++;
+						if (extractedItem % 100000 == 0) {
+							log.info("Thread {}: Extracted {} items", id, extractedItem);
+							break;
+						}
+					}
+				}
+			}
+		} finally {
+			for (SequenceFile.Writer k : fileMap.values()) {
+				log.info("Thread {}: Completed processed {} items", id, extractedItem);
+				k.hflush();
+				k.close();
+			}
+		}
+
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidParser.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/OrcidParser.java
@ -0,0 +1,251 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.ximpleware.*;
+
+import eu.dnetlib.dhp.collection.orcid.model.*;
+import eu.dnetlib.dhp.parser.utility.VtdException;
+import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
+
+public class OrcidParser {
+
+	final Logger log = LoggerFactory.getLogger(OrcidParser.class);
+	private VTDNav vn;
+
+	private AutoPilot ap;
+	private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
+	private static final String NS_COMMON = "common";
+	private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
+	private static final String NS_PERSON = "person";
+	private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
+	private static final String NS_DETAILS = "personal-details";
+	private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
+	private static final String NS_OTHER = "other-name";
+	private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
+	private static final String NS_RECORD = "record";
+	private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
+	private static final String NS_ACTIVITIES = "activities";
+	private static final String NS_ACTIVITIES_URL = "http://www.orcid.org/ns/activities";
+	private static final String NS_WORK = "work";
+	private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
+
+	private static final String NS_ERROR = "error";
+	private static final String NS_HISTORY = "history";
+	private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
+	private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
+	private static final String NS_BULK = "bulk";
+	private static final String NS_EXTERNAL = "external-identifier";
+	private static final String NS_EXTERNAL_URL = "http://www.orcid.org/ns/external-identifier";
+
+	private void generateParsedDocument(final String xml) throws ParseException {
+		final VTDGen vg = new VTDGen();
+		vg.setDoc(xml.getBytes());
+		vg.parse(true);
+		this.vn = vg.getNav();
+		this.ap = new AutoPilot(vn);
+		ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
+		ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
+		ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
+		ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
+		ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
+		ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
+		ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL);
+		ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
+		ap.declareXPathNameSpace(NS_EXTERNAL, NS_EXTERNAL_URL);
+		ap.declareXPathNameSpace(NS_ACTIVITIES, NS_ACTIVITIES_URL);
+	}
+
+	public Author parseSummary(final String xml) {
+
+		try {
+			final Author author = new Author();
+			generateParsedDocument(xml);
+			List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
+				.getTextValuesWithAttributes(
+					ap, vn, "//record:record", Arrays.asList("path"));
+			if (!recordNodes.isEmpty()) {
+				final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
+				author.setOrcid(oid);
+			} else {
+				return null;
+			}
+			List<VtdUtilityParser.Node> personNodes = VtdUtilityParser
+				.getTextValuesWithAttributes(
+					ap, vn, "//person:name", Arrays.asList("visibility"));
+			final String visibility = (personNodes.get(0).getAttributes().get("visibility"));
+			author.setVisibility(visibility);
+			final String name = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:given-names");
+			author.setGivenName(name);
+
+			final String surnames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:family-name");
+			author.setFamilyName(surnames);
+
+			final String creditNames = VtdUtilityParser.getSingleValue(ap, vn, "//personal-details:credit-name");
+			author.setCreditName(creditNames);
+
+			final String biography = VtdUtilityParser
+				.getSingleValue(ap, vn, "//person:biography/personal-details:content");
+			author.setBiography(biography);
+
+			final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
+			if (!otherNames.isEmpty()) {
+				author.setOtherNames(otherNames);
+			}
+
+			ap.selectXPath("//external-identifier:external-identifier");
+
+			while (ap.evalXPath() != -1) {
+				final Pid pid = new Pid();
+
+				final AutoPilot ap1 = new AutoPilot(ap.getNav());
+
+				ap1.selectXPath("./common:external-id-type");
+				while (ap1.evalXPath() != -1) {
+					int it = vn.getText();
+					pid.setSchema(vn.toNormalizedString(it));
+				}
+				ap1.selectXPath("./common:external-id-value");
+				while (ap1.evalXPath() != -1) {
+					int it = vn.getText();
+					pid.setValue(vn.toNormalizedString(it));
+				}
+
+				author.addOtherPid(pid);
+			}
+
+			return author;
+		} catch (Throwable e) {
+			log.error("Error on parsing {}", xml);
+			log.error(e.getMessage());
+			return null;
+		}
+	}
+
+	public Work parseWork(final String xml) {
+
+		try {
+			final Work work = new Work();
+			generateParsedDocument(xml);
+			List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
+				.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "visibility"));
+			if (!workNodes.isEmpty()) {
+				final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
+				work.setOrcid(oid);
+			} else {
+				return null;
+			}
+
+			ap.selectXPath("//common:external-id");
+
+			while (ap.evalXPath() != -1) {
+				final Pid pid = new Pid();
+
+				final AutoPilot ap1 = new AutoPilot(ap.getNav());
+
+				ap1.selectXPath("./common:external-id-type");
+				while (ap1.evalXPath() != -1) {
+					int it = vn.getText();
+					pid.setSchema(vn.toNormalizedString(it));
+				}
+				ap1.selectXPath("./common:external-id-value");
+				while (ap1.evalXPath() != -1) {
+					int it = vn.getText();
+					pid.setValue(vn.toNormalizedString(it));
+				}
+
+				work.addPid(pid);
+			}
+
+			work.setTitle(VtdUtilityParser.getSingleValue(ap, vn, "//work:title/common:title"));
+
+			return work;
+		} catch (Throwable e) {
+			log.error("Error on parsing {}", xml);
+			log.error(e.getMessage());
+			return null;
+		}
+
+	}
+
+	private String extractEmploymentDate(final String xpath) throws Exception {
+
+		ap.selectXPath(xpath);
+		StringBuilder sb = new StringBuilder();
+		while (ap.evalXPath() != -1) {
+			final AutoPilot ap1 = new AutoPilot(ap.getNav());
+			ap1.selectXPath("./common:year");
+			while (ap1.evalXPath() != -1) {
+				int it = vn.getText();
+				sb.append(vn.toNormalizedString(it));
+			}
+			ap1.selectXPath("./common:month");
+			while (ap1.evalXPath() != -1) {
+				int it = vn.getText();
+				sb.append("-");
+				sb.append(vn.toNormalizedString(it));
+			}
+			ap1.selectXPath("./common:day");
+			while (ap1.evalXPath() != -1) {
+				int it = vn.getText();
+				sb.append("-");
+				sb.append(vn.toNormalizedString(it));
+			}
+		}
+		return sb.toString();
+
+	}
+
+	public Employment parseEmployment(final String xml) {
+		try {
+			final Employment employment = new Employment();
+			generateParsedDocument(xml);
+			final String oid = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:source-orcid/common:path");
+			if (StringUtils.isNotBlank(oid)) {
+				employment.setOrcid(oid);
+			} else {
+				return null;
+			}
+			final String depName = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:department-name");
+			final String rolTitle = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:role-title");
+			if (StringUtils.isNotBlank(rolTitle))
+				employment.setRoleTitle(rolTitle);
+			if (StringUtils.isNotBlank(depName))
+				employment.setDepartmentName(depName);
+			else
+				employment
+					.setDepartmentName(
+						VtdUtilityParser
+							.getSingleValue(ap, vn, "//common:organization/common:name"));
+
+			employment.setStartDate(extractEmploymentDate("//common:start-date"));
+			employment.setEndDate(extractEmploymentDate("//common:end-date"));
+
+			final String affiliationId = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:disambiguated-organization-identifier");
+			final String affiliationIdType = VtdUtilityParser
+				.getSingleValue(ap, vn, "//common:disambiguation-source");
+
+			if (StringUtils.isNotBlank(affiliationId) || StringUtils.isNotBlank(affiliationIdType))
+				employment.setAffiliationId(new Pid(affiliationId, affiliationIdType));
+
+			return employment;
+		} catch (Throwable e) {
+			log.error("Error on parsing {}", xml);
+			log.error(e.getMessage());
+			return null;
+		}
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java
@ -0,0 +1,83 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Author extends ORCIDItem {
+	private String givenName;
+	private String familyName;
+
+	private String visibility;
+
+	private String creditName;
+
+	private List<String> otherNames;
+
+	private List<Pid> otherPids;
+
+	private String biography;
+
+	public String getBiography() {
+		return biography;
+	}
+
+	public void setBiography(String biography) {
+		this.biography = biography;
+	}
+
+	public String getGivenName() {
+		return givenName;
+	}
+
+	public void setGivenName(String givenName) {
+		this.givenName = givenName;
+	}
+
+	public String getFamilyName() {
+		return familyName;
+	}
+
+	public void setFamilyName(String familyName) {
+		this.familyName = familyName;
+	}
+
+	public String getCreditName() {
+		return creditName;
+	}
+
+	public void setCreditName(String creditName) {
+		this.creditName = creditName;
+	}
+
+	public List<String> getOtherNames() {
+		return otherNames;
+	}
+
+	public void setOtherNames(List<String> otherNames) {
+		this.otherNames = otherNames;
+	}
+
+	public String getVisibility() {
+		return visibility;
+	}
+
+	public void setVisibility(String visibility) {
+		this.visibility = visibility;
+	}
+
+	public List<Pid> getOtherPids() {
+		return otherPids;
+	}
+
+	public void setOtherPids(List<Pid> otherPids) {
+		this.otherPids = otherPids;
+	}
+
+	public void addOtherPid(final Pid pid) {
+
+		if (otherPids == null)
+			otherPids = new ArrayList<>();
+		otherPids.add(pid);
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Employment.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Employment.java
@ -0,0 +1,54 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+public class Employment extends ORCIDItem {
+
+	private String startDate;
+	private String EndDate;
+
+	private Pid affiliationId;
+
+	private String departmentName;
+
+	private String roleTitle;
+
+	public String getStartDate() {
+		return startDate;
+	}
+
+	public void setStartDate(String startDate) {
+		this.startDate = startDate;
+	}
+
+	public String getEndDate() {
+		return EndDate;
+	}
+
+	public void setEndDate(String endDate) {
+		EndDate = endDate;
+	}
+
+	public Pid getAffiliationId() {
+		return affiliationId;
+	}
+
+	public void setAffiliationId(Pid affiliationId) {
+		this.affiliationId = affiliationId;
+	}
+
+	public String getDepartmentName() {
+		return departmentName;
+	}
+
+	public void setDepartmentName(String departmentName) {
+		this.departmentName = departmentName;
+	}
+
+	public String getRoleTitle() {
+		return roleTitle;
+	}
+
+	public void setRoleTitle(String roleTitle) {
+		this.roleTitle = roleTitle;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java
@ -0,0 +1,14 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+public class ORCIDItem {
+	private String orcid;
+
+	public String getOrcid() {
+		return orcid;
+	}
+
+	public void setOrcid(String orcid) {
+		this.orcid = orcid;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Pid.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Pid.java
@ -0,0 +1,33 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+public class Pid {
+
+	private String value;
+
+	private String schema;
+
+	public Pid() {
+	}
+
+	public Pid(String value, String schema) {
+		this.value = value;
+		this.schema = schema;
+	}
+
+	public String getValue() {
+		return value;
+	}
+
+	public void setValue(String value) {
+		this.value = value;
+	}
+
+	public String getSchema() {
+		return schema;
+	}
+
+	public void setSchema(String schema) {
+		this.schema = schema;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java
@ -0,0 +1,35 @@
+
+package eu.dnetlib.dhp.collection.orcid.model;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Work extends ORCIDItem {
+
+	private String title;
+
+	private List<Pid> pids;
+
+	public String getTitle() {
+		return title;
+	}
+
+	public void setTitle(String title) {
+		this.title = title;
+	}
+
+	public List<Pid> getPids() {
+		return pids;
+	}
+
+	public void setPids(List<Pid> pids) {
+		this.pids = pids;
+	}
+
+	public void addPid(Pid pid) {
+		if (pids == null)
+			pids = new ArrayList<>();
+		pids.add(pid);
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/download_orcid_parameter.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName": "n",
+    "paramLongName": "namenode",
+    "paramDescription": "the Name Node URI",
+    "paramRequired": true
+  },
+  {
+    "paramName": "t",
+    "paramLongName": "targetPath",
+    "paramDescription": "the target PATH where download the files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "a",
+    "paramLongName": "apiURL",
+    "paramDescription": "the FIGSHARE  API id URL to retrieve all the dump files",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/extract_orcid_parameter.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName": "n",
+    "paramLongName": "namenode",
+    "paramDescription": "the Name Node URI",
+    "paramRequired": true
+  },
+  {
+    "paramName": "t",
+    "paramLongName": "targetPath",
+    "paramDescription": "the target PATH to extract files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "s",
+    "paramLongName": "sourcePath",
+    "paramDescription": "the PATH where the tar.gz files were downloaded",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName": "m",
+    "paramLongName": "master",
+    "paramDescription": "the master name",
+    "paramRequired": true
+  },
+  {
+    "paramName": "t",
+    "paramLongName": "targetPath",
+    "paramDescription": "the target PATH of the DF tables",
+    "paramRequired": true
+  },
+  {
+    "paramName": "s",
+    "paramLongName": "sourcePath",
+    "paramDescription": "the PATH of the ORCID sequence file",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/config-default.xml
@ -0,0 +1,23 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/oozie_app/workflow.xml
@ -0,0 +1,81 @@
+<workflow-app name="download_ORCID_DUMP" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>targetPath</name>
+            <description>the  path  to store the original ORCID dump</description>
+        </property>
+        <property>
+            <name>apiURL</name>
+            <description>The figshare  API URL to retrieve the list file to download</description>
+        </property>
+    </parameters>
+
+    <start to="generateTables"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="DownloadDUMP">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+            <main-class>eu.dnetlib.dhp.collection.orcid.DownloadORCIDDumpApplication</main-class>
+            <arg>--namenode</arg><arg>${nameNode}</arg>
+            <arg>--targetPath</arg><arg>${targetPath}</arg>
+            <arg>--apiURL</arg><arg>${apiURL}</arg>
+        </java>
+        <ok to="extractDump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extractDump">
+        <java>
+            <configuration>
+                <property>
+                    <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                    <value>true</value>
+                </property>
+            </configuration>
+
+            <main-class>eu.dnetlib.dhp.collection.orcid.ExtractORCIDDump</main-class>
+            <java-opts> -Xmx6g </java-opts>
+            <arg>--namenode</arg><arg>${nameNode}</arg>
+            <arg>--sourcePath</arg><arg>${targetPath}</arg>
+            <arg>--targetPath</arg><arg>${targetPath}/extracted</arg>
+        </java>
+        <ok to="generateTables"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="generateTables">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Generate ORCID Tables</name>
+            <class>eu.dnetlib.dhp.collection.orcid.SparkGenerateORCIDTable</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=2g
+                --conf spark.sql.shuffle.partitions=3000
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${targetPath}/extracted</arg>
+            <arg>--targetPath</arg><arg>${targetPath}/tables</arg>
+            <arg>--master</arg><arg>yarn</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/preprocess_orcid_dump_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/orcid/preprocess_orcid_dump_parameter.json
@ -0,0 +1,21 @@
+[
+  {
+    "paramName": "n",
+    "paramLongName": "namenode",
+    "paramDescription": "the Name Node URI",
+    "paramRequired": true
+  },
+  {
+    "paramName": "t",
+    "paramLongName": "targetPath",
+    "paramDescription": "the target PATH where download the files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "a",
+    "paramLongName": "apiURL",
+    "paramDescription": "the FIGSHARE  API id URL to retrieve all the dump files",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/orcid/SparkGenerateORCIDTable.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/orcid/SparkGenerateORCIDTable.scala
@ -0,0 +1,101 @@
+package eu.dnetlib.dhp.collection.orcid
+
+import eu.dnetlib.dhp.application.AbstractScalaApplication
+import eu.dnetlib.dhp.collection.orcid.model.{Author, Employment, Pid, Work}
+import org.apache.hadoop.io.Text
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+
+class SparkGenerateORCIDTable(propertyPath: String, args: Array[String], log: Logger)
+    extends AbstractScalaApplication(propertyPath, args, log: Logger) {
+
+  /** Here all the spark applications runs this method
+    * where the whole logic of the spark node is defined
+    */
+  override def run(): Unit = {
+    val sourcePath: String = parser.get("sourcePath")
+    log.info("found parameters sourcePath: {}", sourcePath)
+    val targetPath: String = parser.get("targetPath")
+    log.info("found parameters targetPath: {}", targetPath)
+    extractORCIDTable(spark, sourcePath, targetPath)
+    extractORCIDEmploymentsTable(spark, sourcePath, targetPath)
+    extractORCIDWorksTable(spark, sourcePath, targetPath)
+  }
+
+  def extractORCIDTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
+    val sc: SparkContext = spark.sparkContext
+    import spark.implicits._
+    val df = sc
+      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
+      .map { case (x, y) => (x.toString, y.toString) }
+      .toDF
+      .as[(String, String)]
+    implicit val orcidAuthor: Encoder[Author] = Encoders.bean(classOf[Author])
+//    implicit  val orcidPID:Encoder[Pid] = Encoders.bean(classOf[Pid])
+    df.filter(r => r._1.contains("summaries"))
+      .map { r =>
+        val p = new OrcidParser
+        p.parseSummary(r._2)
+      }
+      .filter(p => p != null)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$targetPath/Authors")
+  }
+
+  def extractORCIDWorksTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
+    val sc: SparkContext = spark.sparkContext
+    import spark.implicits._
+    val df = sc
+      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
+      .map { case (x, y) => (x.toString, y.toString) }
+      .toDF
+      .as[(String, String)]
+    implicit val orcidWorkAuthor: Encoder[Work] = Encoders.bean(classOf[Work])
+    implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
+    df.filter(r => r._1.contains("works"))
+      .map { r =>
+        val p = new OrcidParser
+        p.parseWork(r._2)
+      }
+      .filter(p => p != null)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$targetPath/Works")
+  }
+
+  def extractORCIDEmploymentsTable(spark: SparkSession, sourcePath: String, targetPath: String): Unit = {
+    val sc: SparkContext = spark.sparkContext
+    import spark.implicits._
+    val df = sc
+      .sequenceFile(sourcePath, classOf[Text], classOf[Text])
+      .map { case (x, y) => (x.toString, y.toString) }
+      .toDF
+      .as[(String, String)]
+    implicit val orcidEmploymentAuthor: Encoder[Employment] = Encoders.bean(classOf[Employment])
+    implicit val orcidPID: Encoder[Pid] = Encoders.bean(classOf[Pid])
+    df.filter(r => r._1.contains("employments"))
+      .map { r =>
+        val p = new OrcidParser
+        p.parseEmployment(r._2)
+      }
+      .filter(p => p != null)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$targetPath/Employments")
+  }
+}
+
+object SparkGenerateORCIDTable {
+
+  val log: Logger = LoggerFactory.getLogger(SparkGenerateORCIDTable.getClass)
+
+  def main(args: Array[String]): Unit = {
+
+    new SparkGenerateORCIDTable("/eu/dnetlib/dhp/collection/orcid/generate_orcid_table_parameter.json", args, log)
+      .initialize()
+      .run()
+
+  }
+}
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
@ -176,7 +176,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          ), resourceType
+          ),
+          resourceType
        )
    }
    if (schemaOrg != null && schemaOrg.nonEmpty) {
@ -188,7 +189,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          ), schemaOrg
+          ),
+          schemaOrg
        )

    }
@ -203,7 +205,8 @@ object DataciteToOAFTransformation {
          vocabularies.getSynonymAsQualifier(
            ModelConstants.DNET_RESULT_TYPOLOGIES,
            typeQualifier.getClassid
-          ), resourceTypeGeneral
+          ),
+          resourceTypeGeneral
        )

    }
@ -228,7 +231,6 @@ object DataciteToOAFTransformation {
    itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
    i.setInstanceTypeMapping(List(itm).asJava)

-
    typeQualifiers._2.getClassname match {
      case "dataset" =>
        val r = new OafDataset
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala
@ -593,7 +593,6 @@ object BioDBToOAF {
    itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
    i.setInstanceTypeMapping(List(itm).asJava)

-
    i.setCollectedfrom(collectedFromMap("ebi"))
    d.setInstance(List(i).asJava)
    i.setDateofacceptance(
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
@ -195,7 +195,7 @@ object PubMedToOaf {
      pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
    } else {
      val i_type = article.getPublicationTypes.asScala
-        .map(s => (s.getValue,getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
+        .map(s => (s.getValue, getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)))
        .find(q => q._2 != null)

      if (i_type.isDefined) {
@ -205,8 +205,7 @@ object PubMedToOaf {
        itm.setOriginalType(i_type.get._1)
        itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
        pubmedInstance.setInstanceTypeMapping(List(itm).asJava)
-      }
-      else
+      } else
        return null
    }
    val result = createResult(pubmedInstance.getInstancetype, vocabularies)
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/orcid/DownloadORCIDTest.java
@ -0,0 +1,119 @@
+
+package eu.dnetlib.dhp.collection.orcid;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.ximpleware.NavException;
+import com.ximpleware.ParseException;
+import com.ximpleware.XPathEvalException;
+import com.ximpleware.XPathParseException;
+
+import eu.dnetlib.dhp.collection.orcid.model.Author;
+import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
+import eu.dnetlib.dhp.parser.utility.VtdException;
+
+public class DownloadORCIDTest {
+	private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
+
+	@Test
+	public void testSummary() throws Exception {
+		final String xml = IOUtils
+			.toString(
+				Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
+
+		final OrcidParser parser = new OrcidParser();
+		ORCIDItem orcidItem = parser.parseSummary(xml);
+
+		final ObjectMapper mapper = new ObjectMapper();
+		System.out.println(mapper.writeValueAsString(orcidItem));
+
+	}
+
+	@Test
+	public void testParsingWork() throws Exception {
+
+		final List<String> works_path = Arrays
+			.asList(
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
+				"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
+
+			);
+
+		final OrcidParser parser = new OrcidParser();
+		final ObjectMapper mapper = new ObjectMapper();
+		works_path.stream().map(s -> {
+			try {
+				return IOUtils
+					.toString(
+						Objects
+							.requireNonNull(
+								getClass()
+									.getResourceAsStream(
+										s)));
+			} catch (IOException e) {
+				throw new RuntimeException(e);
+			}
+		}).forEach(s -> {
+			try {
+				System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
+			} catch (Exception e) {
+				throw new RuntimeException(e);
+			}
+		});
+	}
+
+	@Test
+	public void testParsingEmployments() throws Exception {
+
+		final List<String> works_path = Arrays
+			.asList(
+				"/eu/dnetlib/dhp/collection/orcid/employment.xml",
+				"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
+				"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
+
+			);
+
+		final OrcidParser parser = new OrcidParser();
+		final ObjectMapper mapper = new ObjectMapper();
+		works_path.stream().map(s -> {
+			try {
+				return IOUtils
+					.toString(
+						Objects
+							.requireNonNull(
+								getClass()
+									.getResourceAsStream(
+										s)));
+			} catch (IOException e) {
+				throw new RuntimeException(e);
+			}
+		}).forEach(s -> {
+			try {
+				System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
+			} catch (Exception e) {
+				throw new RuntimeException(e);
+			}
+		});
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml
@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="26448226" path="/0000-0001-5010-5001/work/26448226" visibility="public">
+    <common:created-date>2016-09-01T19:22:46.768Z</common:created-date>
+    <common:last-modified-date>2022-05-25T03:48:56.968Z</common:last-modified-date>
+    <common:source>
+        <common:source-client-id>
+            <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
+            <common:path>0000-0002-5982-8983</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-client-id>
+        <common:source-name>Scopus - Elsevier</common:source-name>
+        <common:assertion-origin-orcid>
+            <common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
+            <common:path>0000-0001-5010-5001</common:path>
+            <common:host>orcid.org</common:host>
+        </common:assertion-origin-orcid>
+        <common:assertion-origin-name>Quang Nguyen</common:assertion-origin-name>
+    </common:source>
+    <work:title>
+        <common:title>Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms</common:title>
+    </work:title>
+    <work:journal-title>American Journal of Neuroradiology</work:journal-title>
+    <work:citation>
+        <work:citation-type>bibtex</work:citation-type>
+        <work:citation-value>@article{Nguyen2014,title = {Vision outcomes and major complications after endovascular coil embolization of ophthalmic segment aneurysms},journal = {American Journal of Neuroradiology},year = {2014},volume = {35},number = {11},pages = {2140-2145},author = {Durst, C. and Starke, R.M. and Gaughen, J. and Nguyen, Q. and Patrie, J. and Jensen, M.E. and Evans, A.J.}}</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2014</common:year>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>doi</common:external-id-type>
+            <common:external-id-value>10.3174/ajnr.A4032</common:external-id-value>
+            <common:external-id-normalized transient="true">10.3174/ajnr.a4032</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>eid</common:external-id-type>
+            <common:external-id-value>2-s2.0-84911865199</common:external-id-value>
+            <common:external-id-normalized transient="true">2-s2.0-84911865199</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-84911865199&amp;partnerID=MN8TOARS</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:credit-name>Durst, C.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Starke, R.M.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Gaughen, J.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Nguyen, Q.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Patrie, J.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Jensen, M.E.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Evans, A.J.</work:credit-name>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml
@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address"
+    xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+    xmlns:employment="http://www.orcid.org/ns/employment"
+    xmlns:education="http://www.orcid.org/ns/education"
+    xmlns:other-name="http://www.orcid.org/ns/other-name"
+    xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+    xmlns:funding="http://www.orcid.org/ns/funding"
+    xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+    xmlns:service="http://www.orcid.org/ns/service"
+    xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+    xmlns:distinction="http://www.orcid.org/ns/distinction"
+    xmlns:internal="http://www.orcid.org/ns/internal"
+    xmlns:membership="http://www.orcid.org/ns/membership"
+    xmlns:person="http://www.orcid.org/ns/person"
+    xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+    xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+    xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+    xmlns:activities="http://www.orcid.org/ns/activities"
+    xmlns:qualification="http://www.orcid.org/ns/qualification"
+    xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+    xmlns:error="http://www.orcid.org/ns/error"
+    xmlns:preferences="http://www.orcid.org/ns/preferences"
+    xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+    xmlns:work="http://www.orcid.org/ns/work"
+    xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="50101152"
+    path="/0000-0001-5349-4030/work/50101152" visibility="public">
+    <common:created-date>2018-11-01T19:49:45.562Z</common:created-date>
+    <common:last-modified-date>2018-11-01T19:49:45.562Z</common:last-modified-date>
+    <common:source>
+        <common:source-client-id>
+            <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
+            <common:path>0000-0002-5982-8983</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-client-id>
+        <common:source-name>Scopus - Elsevier</common:source-name>
+    </common:source>
+    <work:title>
+        <common:title>"Calling Out" in class: Degrees of candor in addressing social injustices in
+            racially homogenous and heterogeneous U.S. history classrooms</common:title>
+    </work:title>
+    <work:journal-title>Journal of Social Studies Research</work:journal-title>
+    <work:citation>
+        <work:citation-type>bibtex</work:citation-type>
+        <work:citation-value>@article{Massaro2018,title = {{"}Calling Out{"} in class: Degrees of
+            candor in addressing social injustices in racially homogenous and heterogeneous U.S.
+            history classrooms},journal = {Journal of Social Studies Research},year = {2018},author
+            = {Parkhouse, H. and Massaro, V.R.}}</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2018</common:year>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>doi</common:external-id-type>
+            <common:external-id-value>10.1016/j.jssr.2018.01.004</common:external-id-value>
+            <common:external-id-normalized transient="true"
+                >10.1016/j.jssr.2018.01.004</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>eid</common:external-id-type>
+            <common:external-id-value>2-s2.0-85041949043</common:external-id-value>
+            <common:external-id-normalized transient="true"
+                >2-s2.0-85041949043</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://www.scopus.com/inward/record.url?eid=2-s2.0-85041949043&amp;partnerID=MN8TOARS</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:credit-name>Parkhouse, H.</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Massaro, V.R.</work:credit-name>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml
@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address"
+           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+           xmlns:employment="http://www.orcid.org/ns/employment"
+           xmlns:education="http://www.orcid.org/ns/education"
+           xmlns:other-name="http://www.orcid.org/ns/other-name"
+           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+           xmlns:funding="http://www.orcid.org/ns/funding"
+           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+           xmlns:service="http://www.orcid.org/ns/service"
+           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+           xmlns:distinction="http://www.orcid.org/ns/distinction"
+           xmlns:internal="http://www.orcid.org/ns/internal"
+           xmlns:membership="http://www.orcid.org/ns/membership"
+           xmlns:person="http://www.orcid.org/ns/person"
+           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+           xmlns:activities="http://www.orcid.org/ns/activities"
+           xmlns:qualification="http://www.orcid.org/ns/qualification"
+           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+           xmlns:error="http://www.orcid.org/ns/error"
+           xmlns:preferences="http://www.orcid.org/ns/preferences"
+           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+           xmlns:work="http://www.orcid.org/ns/work"
+           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
+           path="/0000-0003-2760-1191/work/28776099" visibility="public">
+    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
+    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
+            <common:path>0000-0002-9157-3431</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Europe PubMed Central</common:source-name>
+    </common:source>
+    <work:title>
+        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
+            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
+            ST-Segment-Elevation Myocardial Infarction.</common:title>
+    </work:title>
+    <work:citation>
+        <work:citation-type>formatted-unspecified</work:citation-type>
+        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
+            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2016</common:year>
+        <common:month>11</common:month>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>pmid</common:external-id-type>
+            <common:external-id-value>27899851</common:external-id-value>
+            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>pmc</common:external-id-type>
+            <common:external-id-value>PMC5126442</common:external-id-value>
+            <common:external-id-normalized transient="true"
+            >PMC5126442</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:credit-name>Abdel-Dayem K</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Abdel-Dayem Fake</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Eweda II</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>El-Sherbiny A</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Dimitry MO</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Nammas W</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml
@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address"
+           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+           xmlns:employment="http://www.orcid.org/ns/employment"
+           xmlns:education="http://www.orcid.org/ns/education"
+           xmlns:other-name="http://www.orcid.org/ns/other-name"
+           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+           xmlns:funding="http://www.orcid.org/ns/funding"
+           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+           xmlns:service="http://www.orcid.org/ns/service"
+           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+           xmlns:distinction="http://www.orcid.org/ns/distinction"
+           xmlns:internal="http://www.orcid.org/ns/internal"
+           xmlns:membership="http://www.orcid.org/ns/membership"
+           xmlns:person="http://www.orcid.org/ns/person"
+           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+           xmlns:activities="http://www.orcid.org/ns/activities"
+           xmlns:qualification="http://www.orcid.org/ns/qualification"
+           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+           xmlns:error="http://www.orcid.org/ns/error"
+           xmlns:preferences="http://www.orcid.org/ns/preferences"
+           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+           xmlns:work="http://www.orcid.org/ns/work"
+           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
+           path="/0000-0003-2760-1191/work/28776099" visibility="public">
+    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
+    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
+            <common:path>0000-0002-9157-3431</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Europe PubMed Central</common:source-name>
+    </common:source>
+    <work:title>
+        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
+            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
+            ST-Segment-Elevation Myocardial Infarction.</common:title>
+    </work:title>
+    <work:citation>
+        <work:citation-type>formatted-unspecified</work:citation-type>
+        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
+            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2016</common:year>
+        <common:month>11</common:month>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>pmid</common:external-id-type>
+            <common:external-id-value>27899851</common:external-id-value>
+            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>pmc</common:external-id-type>
+            <common:external-id-value>PMC5126442</common:external-id-value>
+            <common:external-id-normalized transient="true"
+            >PMC5126442</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:credit-name>Khair Abde Daye</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Eweda II</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>El-Sherbiny A</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Dimitry MO</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>Nammas W</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>first</work:contributor-sequence>
+                <work:contributor-role>author</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml
@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<work:work xmlns:address="http://www.orcid.org/ns/address"
+           xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+           xmlns:employment="http://www.orcid.org/ns/employment"
+           xmlns:education="http://www.orcid.org/ns/education"
+           xmlns:other-name="http://www.orcid.org/ns/other-name"
+           xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+           xmlns:funding="http://www.orcid.org/ns/funding"
+           xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+           xmlns:service="http://www.orcid.org/ns/service"
+           xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+           xmlns:distinction="http://www.orcid.org/ns/distinction"
+           xmlns:internal="http://www.orcid.org/ns/internal"
+           xmlns:membership="http://www.orcid.org/ns/membership"
+           xmlns:person="http://www.orcid.org/ns/person"
+           xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+           xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+           xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+           xmlns:activities="http://www.orcid.org/ns/activities"
+           xmlns:qualification="http://www.orcid.org/ns/qualification"
+           xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+           xmlns:error="http://www.orcid.org/ns/error"
+           xmlns:preferences="http://www.orcid.org/ns/preferences"
+           xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+           xmlns:work="http://www.orcid.org/ns/work"
+           xmlns:peer-review="http://www.orcid.org/ns/peer-review" put-code="28776099"
+           path="/0000-0003-2760-1191/work/28776099" visibility="public">
+    <common:created-date>2016-12-12T23:02:05.233Z</common:created-date>
+    <common:last-modified-date>2016-12-13T09:08:16.412Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0002-9157-3431</common:uri>
+            <common:path>0000-0002-9157-3431</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Europe PubMed Central</common:source-name>
+    </common:source>
+    <work:title>
+        <common:title>Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which
+            Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for
+            ST-Segment-Elevation Myocardial Infarction.</common:title>
+    </work:title>
+    <work:citation>
+        <work:citation-type>formatted-unspecified</work:citation-type>
+        <work:citation-value>Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta
+            Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016</work:citation-value>
+    </work:citation>
+    <work:type>journal-article</work:type>
+    <common:publication-date>
+        <common:year>2016</common:year>
+        <common:month>11</common:month>
+    </common:publication-date>
+    <common:external-ids>
+        <common:external-id>
+            <common:external-id-type>pmid</common:external-id-type>
+            <common:external-id-value>27899851</common:external-id-value>
+            <common:external-id-normalized transient="true">27899851</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+        <common:external-id>
+            <common:external-id-type>pmc</common:external-id-type>
+            <common:external-id-value>PMC5126442</common:external-id-value>
+            <common:external-id-normalized transient="true"
+            >PMC5126442</common:external-id-normalized>
+            <common:external-id-relationship>self</common:external-id-relationship>
+        </common:external-id>
+    </common:external-ids>
+    <common:url>http://europepmc.org/abstract/med/27899851</common:url>
+    <work:contributors>
+        <work:contributor>
+            <work:contributor-attributes>
+                <work:contributor-sequence>seq0</work:contributor-sequence>
+                <work:contributor-role>role0</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>creditname1</work:credit-name>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>creditname2</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>seq2</work:contributor-sequence>
+                <work:contributor-role></work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name>creditname3</work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence></work:contributor-sequence>
+                <work:contributor-role>role3</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+        <work:contributor>
+            <work:credit-name></work:credit-name>
+            <work:contributor-attributes>
+                <work:contributor-sequence>seq4</work:contributor-sequence>
+                <work:contributor-role>role4</work:contributor-role>
+            </work:contributor-attributes>
+        </work:contributor>
+    </work:contributors>
+</work:work>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment.xml
@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
+                       xmlns:history="http://www.orcid.org/ns/history"
+                       xmlns:employment="http://www.orcid.org/ns/employment"
+                       xmlns:education="http://www.orcid.org/ns/education"
+                       xmlns:other-name="http://www.orcid.org/ns/other-name"
+                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+                       xmlns:funding="http://www.orcid.org/ns/funding"
+                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+                       xmlns:service="http://www.orcid.org/ns/service"
+                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+                       xmlns:distinction="http://www.orcid.org/ns/distinction"
+                       xmlns:internal="http://www.orcid.org/ns/internal"
+                       xmlns:membership="http://www.orcid.org/ns/membership"
+                       xmlns:person="http://www.orcid.org/ns/person"
+                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+                       xmlns:activities="http://www.orcid.org/ns/activities"
+                       xmlns:qualification="http://www.orcid.org/ns/qualification"
+                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+                       xmlns:error="http://www.orcid.org/ns/error"
+                       xmlns:preferences="http://www.orcid.org/ns/preferences"
+                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
+                       put-code="2205087" path="/0000-0001-5010-5001/employment/2205087" display-index="0"
+                       visibility="public">
+    <common:created-date>2016-09-01T19:21:05.791Z</common:created-date>
+    <common:last-modified-date>2016-09-01T19:21:05.791Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0001-5010-5001</common:uri>
+            <common:path>0000-0001-5010-5001</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Quang Nguyen</common:source-name>
+    </common:source>
+    <common:organization>
+        <common:name>Beth Israel Deaconess Medical Center</common:name>
+        <common:address>
+            <common:city>Boston</common:city>
+            <common:region>MA</common:region>
+            <common:country>US</common:country>
+        </common:address>
+        <common:disambiguated-organization>
+            <common:disambiguated-organization-identifier>1859</common:disambiguated-organization-identifier>
+            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
+        </common:disambiguated-organization>
+    </common:organization>
+</employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_2.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_2.xml
@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
+                       xmlns:history="http://www.orcid.org/ns/history"
+                       xmlns:employment="http://www.orcid.org/ns/employment"
+                       xmlns:education="http://www.orcid.org/ns/education"
+                       xmlns:other-name="http://www.orcid.org/ns/other-name"
+                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+                       xmlns:funding="http://www.orcid.org/ns/funding"
+                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+                       xmlns:service="http://www.orcid.org/ns/service"
+                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+                       xmlns:distinction="http://www.orcid.org/ns/distinction"
+                       xmlns:internal="http://www.orcid.org/ns/internal"
+                       xmlns:membership="http://www.orcid.org/ns/membership"
+                       xmlns:person="http://www.orcid.org/ns/person"
+                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+                       xmlns:activities="http://www.orcid.org/ns/activities"
+                       xmlns:qualification="http://www.orcid.org/ns/qualification"
+                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+                       xmlns:error="http://www.orcid.org/ns/error"
+                       xmlns:preferences="http://www.orcid.org/ns/preferences"
+                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
+                       put-code="6364960" path="/0000-0001-5011-3001/employment/6364960" display-index="1"
+                       visibility="public">
+    <common:created-date>2018-09-03T01:46:19.474Z</common:created-date>
+    <common:last-modified-date>2018-09-03T01:46:19.474Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0001-5011-3001</common:uri>
+            <common:path>0000-0001-5011-3001</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>zhengyan li</common:source-name>
+    </common:source>
+    <common:start-date>
+        <common:year>2008</common:year>
+        <common:month>09</common:month>
+        <common:day>01</common:day>
+    </common:start-date>
+    <common:organization>
+        <common:name>Anhui Academy of Agricultural Sciences</common:name>
+        <common:address>
+            <common:city>Hefei</common:city>
+            <common:region>Anhui</common:region>
+            <common:country>CN</common:country>
+        </common:address>
+        <common:disambiguated-organization>
+            <common:disambiguated-organization-identifier>125385</common:disambiguated-organization-identifier>
+            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
+        </common:disambiguated-organization>
+    </common:organization>
+</employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_3.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/employment_3.xml
@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<employment:employment xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email"
+                       xmlns:history="http://www.orcid.org/ns/history"
+                       xmlns:employment="http://www.orcid.org/ns/employment"
+                       xmlns:education="http://www.orcid.org/ns/education"
+                       xmlns:other-name="http://www.orcid.org/ns/other-name"
+                       xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+                       xmlns:funding="http://www.orcid.org/ns/funding"
+                       xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+                       xmlns:service="http://www.orcid.org/ns/service"
+                       xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+                       xmlns:distinction="http://www.orcid.org/ns/distinction"
+                       xmlns:internal="http://www.orcid.org/ns/internal"
+                       xmlns:membership="http://www.orcid.org/ns/membership"
+                       xmlns:person="http://www.orcid.org/ns/person"
+                       xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+                       xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+                       xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+                       xmlns:activities="http://www.orcid.org/ns/activities"
+                       xmlns:qualification="http://www.orcid.org/ns/qualification"
+                       xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+                       xmlns:error="http://www.orcid.org/ns/error"
+                       xmlns:preferences="http://www.orcid.org/ns/preferences"
+                       xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+                       xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review"
+                       put-code="7210424" path="/0000-0001-5022-8001/employment/7210424" display-index="1"
+                       visibility="public">
+    <common:created-date>2021-03-11T14:48:29.603Z</common:created-date>
+    <common:last-modified-date>2021-03-11T14:48:29.603Z</common:last-modified-date>
+    <common:source>
+        <common:source-orcid>
+            <common:uri>https://orcid.org/0000-0001-5012-1001</common:uri>
+            <common:path>0000-0001-5012-1001</common:path>
+            <common:host>orcid.org</common:host>
+        </common:source-orcid>
+        <common:source-name>Asma Bazzi</common:source-name>
+    </common:source>
+    <common:department-name>Pathology and Laboratory Medicine</common:department-name>
+    <common:role-title>Medical Laboratory Technologist</common:role-title>
+    <common:start-date>
+        <common:year>1994</common:year>
+        <common:month>10</common:month>
+        <common:day>01</common:day>
+    </common:start-date>
+    <common:end-date>
+        <common:year>2000</common:year>
+        <common:month>06</common:month>
+        <common:day>30</common:day>
+    </common:end-date>
+    <common:organization>
+        <common:name>American University of Beirut</common:name>
+        <common:address>
+            <common:city>Hamra</common:city>
+            <common:region>Beirut</common:region>
+            <common:country>LB</common:country>
+        </common:address>
+        <common:disambiguated-organization>
+            <common:disambiguated-organization-identifier>11238</common:disambiguated-organization-identifier>
+            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
+        </common:disambiguated-organization>
+    </common:organization>
+</employment:employment>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/summary.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/orcid/summary.xml
@ -0,0 +1,581 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<record:record xmlns:address="http://www.orcid.org/ns/address" xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:education="http://www.orcid.org/ns/education" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:research-resource="http://www.orcid.org/ns/research-resource" xmlns:service="http://www.orcid.org/ns/service" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:internal="http://www.orcid.org/ns/internal" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:person="http://www.orcid.org/ns/person" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:error="http://www.orcid.org/ns/error" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-5045-1000">
+    <common:orcid-identifier>
+        <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+        <common:path>0000-0001-5045-1000</common:path>
+        <common:host>orcid.org</common:host>
+    </common:orcid-identifier>
+    <preferences:preferences>
+        <preferences:locale>es</preferences:locale>
+    </preferences:preferences>
+    <history:history>
+        <history:creation-method>Direct</history:creation-method>
+        <history:submission-date>2023-01-17T23:50:40.215Z</history:submission-date>
+        <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+        <history:claimed>true</history:claimed>
+        <history:verified-email>true</history:verified-email>
+        <history:verified-primary-email>true</history:verified-primary-email>
+    </history:history>
+    <person:person path="/0000-0001-5045-1000/person">
+        <person:name visibility="public" path="0000-0001-5045-1000">
+            <common:created-date>2023-01-17T23:50:40.472Z</common:created-date>
+            <common:last-modified-date>2023-01-17T23:50:40.472Z</common:last-modified-date>
+            <personal-details:given-names>Patricio</personal-details:given-names>
+            <personal-details:family-name>Sánchez Quinchuela</personal-details:family-name>
+        </person:name>
+        <other-name:other-names path="/0000-0001-5045-1000/other-names"/>
+        <person:biography visibility="public" path="/0000-0001-5045-1000/biography">
+            <common:created-date>2023-01-19T13:47:33.653Z</common:created-date>
+            <common:last-modified-date>2023-01-19T13:47:33.653Z</common:last-modified-date>
+            <personal-details:content>Especialista de vinculación con la sociedad y docente de la Universidad de las Artes. Magister en Economía Social y Solidaria por el IAEN; Magister en Proyectos Sociales y Productivos por la UNACH. Licenciado en Artes UCE. Licenciado en Castellano y Literatura por la UNACH. Doctorando del programa de Sociología de la UNED España. Larga trayectoria vinculado a las organizaciones sociales acompañando procesos de gestión cultural, formación de liderazgos y economía solidaria.</personal-details:content>
+        </person:biography>
+        <researcher-url:researcher-urls path="/0000-0001-5045-1000/researcher-urls"/>
+        <email:emails path="/0000-0001-5045-1000/email"/>
+        <address:addresses path="/0000-0001-5045-1000/address"/>
+        <keyword:keywords path="/0000-0001-5045-1000/keywords"/>
+        <external-identifier:external-identifiers path="/0000-0001-7291-3210/external-identifiers">
+            <common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
+            <external-identifier:external-identifier put-code="134902" visibility="public" path="/0000-0001-7291-3210/external-identifiers/134902" display-index="1">
+                <common:created-date>2013-03-08T03:20:39.347Z</common:created-date>
+                <common:last-modified-date>2018-02-05T23:27:36.636Z</common:last-modified-date>
+                <common:source>
+                    <common:source-client-id>
+                        <common:uri>https://orcid.org/client/0000-0002-5982-8983</common:uri>
+                        <common:path>0000-0002-5982-8983</common:path>
+                        <common:host>orcid.org</common:host>
+                    </common:source-client-id>
+                    <common:source-name>Scopus - Elsevier</common:source-name>
+                    <common:assertion-origin-orcid>
+                        <common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
+                        <common:path>0000-0001-7291-3210</common:path>
+                        <common:host>orcid.org</common:host>
+                    </common:assertion-origin-orcid>
+                    <common:assertion-origin-name>Paolo Manghi</common:assertion-origin-name>
+                </common:source>
+                <common:external-id-type>Scopus Author ID</common:external-id-type>
+                <common:external-id-value>6602255248</common:external-id-value>
+                <common:external-id-url>http://www.scopus.com/inward/authorDetails.url?authorID=6602255248&amp;partnerID=MN8TOARS</common:external-id-url>
+                <common:external-id-relationship>self</common:external-id-relationship>
+            </external-identifier:external-identifier>
+        </external-identifier:external-identifiers>
+    </person:person>
+    <activities:activities-summary path="/0000-0001-5045-1000/activities">
+        <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+        <activities:distinctions path="/0000-0001-5045-1000/distinctions">
+            <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
+                <common:external-ids/>
+                <distinction:distinction-summary put-code="19395146" display-index="1" path="/0000-0001-5045-1000/distinction/19395146" visibility="public">
+                    <common:created-date>2023-01-19T13:49:48.482Z</common:created-date>
+                    <common:last-modified-date>2023-01-19T13:49:48.482Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Programa de Maestría</common:department-name>
+                    <common:role-title>Becario del programa de Maestría en Economía Social y Solidaria</common:role-title>
+                    <common:start-date>
+                        <common:year>2014</common:year>
+                        <common:month>10</common:month>
+                        <common:day>20</common:day>
+                    </common:start-date>
+                    <common:organization>
+                        <common:name>Instituto de Altos Estudios Nacionales</common:name>
+                        <common:address>
+                            <common:city>Quito</common:city>
+                            <common:region>Pichincha</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </distinction:distinction-summary>
+            </activities:affiliation-group>
+        </activities:distinctions>
+        <activities:educations path="/0000-0001-5045-1000/educations">
+            <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
+                <common:external-ids/>
+                <education:education-summary put-code="19389331" display-index="1" path="/0000-0001-5045-1000/education/19389331" visibility="public">
+                    <common:created-date>2023-01-18T21:41:03.175Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:41:03.175Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Programa de Doctorado en Sociología</common:department-name>
+                    <common:role-title>Doctorando del Programa de Sociología</common:role-title>
+                    <common:start-date>
+                        <common:year>2020</common:year>
+                        <common:month>11</common:month>
+                        <common:day>06</common:day>
+                    </common:start-date>
+                    <common:organization>
+                        <common:name>Universidad Nacional de Educación a Distancia Facultad de Ciencias Políticas y Sociología</common:name>
+                        <common:address>
+                            <common:city>Madrid</common:city>
+                            <common:region>Comunidad de Madrid</common:region>
+                            <common:country>ES</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>223339</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>RINGGOLD</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </education:education-summary>
+            </activities:affiliation-group>
+        </activities:educations>
+        <activities:employments path="/0000-0001-5045-1000/employments">
+            <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
+                <common:external-ids/>
+                <employment:employment-summary put-code="19379757" display-index="1" path="/0000-0001-5045-1000/employment/19379757" visibility="public">
+                    <common:created-date>2023-01-17T23:57:08.246Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:22:21.513Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
+                    <common:role-title>Especialista de Proyectos y docente</common:role-title>
+                    <common:start-date>
+                        <common:year>2021</common:year>
+                        <common:month>11</common:month>
+                        <common:day>01</common:day>
+                    </common:start-date>
+                    <common:organization>
+                        <common:name>Universidad de las Artes</common:name>
+                        <common:address>
+                            <common:city>Guayaquil</common:city>
+                            <common:region>Guayas</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/016drwn73</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </employment:employment-summary>
+            </activities:affiliation-group>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
+                <common:external-ids/>
+                <employment:employment-summary put-code="19389234" display-index="1" path="/0000-0001-5045-1000/employment/19389234" visibility="public">
+                    <common:created-date>2023-01-18T21:25:07.138Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:25:07.138Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Dirección de Vinculación con la Sociedad</common:department-name>
+                    <common:role-title>Director</common:role-title>
+                    <common:start-date>
+                        <common:year>2019</common:year>
+                        <common:month>11</common:month>
+                        <common:day>05</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>2021</common:year>
+                        <common:month>10</common:month>
+                        <common:day>31</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Universidad Regional Amazónica IKIAM</common:name>
+                        <common:address>
+                            <common:city>Tena</common:city>
+                            <common:region>Napo</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/05xedqd83</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                    <common:url>http://ikiam.edu.ec</common:url>
+                </employment:employment-summary>
+            </activities:affiliation-group>
+        </activities:employments>
+        <activities:fundings path="/0000-0001-5045-1000/fundings"/>
+        <activities:invited-positions path="/0000-0001-5045-1000/invited-positions"/>
+        <activities:memberships path="/0000-0001-5045-1000/memberships">
+            <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
+                <common:external-ids/>
+                <membership:membership-summary put-code="19927715" display-index="1" path="/0000-0001-5045-1000/membership/19927715" visibility="public">
+                    <common:created-date>2023-03-24T18:16:09.131Z</common:created-date>
+                    <common:last-modified-date>2023-03-24T18:16:09.131Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Artes Escénicas</common:department-name>
+                    <common:role-title>Miembro</common:role-title>
+                    <common:start-date>
+                        <common:year>2000</common:year>
+                        <common:month>07</common:month>
+                        <common:day>15</common:day>
+                    </common:start-date>
+                    <common:organization>
+                        <common:name>Casa de la Cultura Ecuatoriana</common:name>
+                        <common:address>
+                            <common:city>Riobamba</common:city>
+                            <common:region>Sierra Centro</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                    </common:organization>
+                </membership:membership-summary>
+            </activities:affiliation-group>
+        </activities:memberships>
+        <activities:peer-reviews path="/0000-0001-5045-1000/peer-reviews"/>
+        <activities:qualifications path="/0000-0001-5045-1000/qualifications">
+            <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
+                <common:external-ids/>
+                <qualification:qualification-summary put-code="19389264" display-index="1" path="/0000-0001-5045-1000/qualification/19389264" visibility="public">
+                    <common:created-date>2023-01-18T21:29:11.300Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:29:11.300Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Programa de Gobernabilidad</common:department-name>
+                    <common:role-title>Magister en Economïa Social y Solidaria</common:role-title>
+                    <common:start-date>
+                        <common:year>2014</common:year>
+                        <common:month>10</common:month>
+                        <common:day>20</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>2017</common:year>
+                        <common:month>01</common:month>
+                        <common:day>26</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Instituto de Altos Estudios Nacionales</common:name>
+                        <common:address>
+                            <common:city>Quito</common:city>
+                            <common:region>Pichincha</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/011g3me54</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </qualification:qualification-summary>
+            </activities:affiliation-group>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
+                <common:external-ids/>
+                <qualification:qualification-summary put-code="19389298" display-index="1" path="/0000-0001-5045-1000/qualification/19389298" visibility="public">
+                    <common:created-date>2023-01-18T21:34:32.093Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:34:32.093Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Posgrados</common:department-name>
+                    <common:role-title>Magister en Proyectos Sociales y Productivos</common:role-title>
+                    <common:start-date>
+                        <common:year>2001</common:year>
+                        <common:month>03</common:month>
+                        <common:day>09</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>2003</common:year>
+                        <common:month>02</common:month>
+                        <common:day>27</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Universidad Nacional de Chimborazo</common:name>
+                        <common:address>
+                            <common:city>Riobamba</common:city>
+                            <common:region>Chimborazo</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </qualification:qualification-summary>
+            </activities:affiliation-group>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
+                <common:external-ids/>
+                <qualification:qualification-summary put-code="19389353" display-index="1" path="/0000-0001-5045-1000/qualification/19389353" visibility="public">
+                    <common:created-date>2023-01-18T21:45:07.379Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:45:07.379Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Ciencias de la Educación</common:department-name>
+                    <common:role-title>Licenciado en Ciencias de la Educación en Castellano y Literatura</common:role-title>
+                    <common:start-date>
+                        <common:year>1994</common:year>
+                        <common:month>10</common:month>
+                        <common:day>03</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>2000</common:year>
+                        <common:month>01</common:month>
+                        <common:day>31</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Universidad Nacional de Chimborazo</common:name>
+                        <common:address>
+                            <common:city>Riobamba</common:city>
+                            <common:region>Chimborazo</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>https://ror.org/059wmd288</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>ROR</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </qualification:qualification-summary>
+            </activities:affiliation-group>
+            <activities:affiliation-group>
+                <common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
+                <common:external-ids/>
+                <qualification:qualification-summary put-code="19389317" display-index="1" path="/0000-0001-5045-1000/qualification/19389317" visibility="public">
+                    <common:created-date>2023-01-18T21:37:42.186Z</common:created-date>
+                    <common:last-modified-date>2023-01-18T21:37:42.186Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <common:department-name>Facultad de Artes</common:department-name>
+                    <common:role-title>Licenciado en Artes</common:role-title>
+                    <common:start-date>
+                        <common:year>1989</common:year>
+                        <common:month>09</common:month>
+                        <common:day>05</common:day>
+                    </common:start-date>
+                    <common:end-date>
+                        <common:year>1997</common:year>
+                        <common:month>08</common:month>
+                        <common:day>07</common:day>
+                    </common:end-date>
+                    <common:organization>
+                        <common:name>Universidad Central del Ecuador</common:name>
+                        <common:address>
+                            <common:city>Quito</common:city>
+                            <common:region>Pichincha</common:region>
+                            <common:country>EC</common:country>
+                        </common:address>
+                        <common:disambiguated-organization>
+                            <common:disambiguated-organization-identifier>http://dx.doi.org/10.13039/100019134</common:disambiguated-organization-identifier>
+                            <common:disambiguation-source>FUNDREF</common:disambiguation-source>
+                        </common:disambiguated-organization>
+                    </common:organization>
+                </qualification:qualification-summary>
+            </activities:affiliation-group>
+        </activities:qualifications>
+        <activities:research-resources path="/0000-0001-5045-1000/research-resources"/>
+        <activities:services path="/0000-0001-5045-1000/services"/>
+        <activities:works path="/0000-0001-5045-1000/works">
+            <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+            <activities:group>
+                <common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
+                <common:external-ids/>
+                <work:work-summary put-code="131526645" path="/0000-0001-5045-1000/work/131526645" visibility="public" display-index="1">
+                    <common:created-date>2023-03-24T18:36:56.180Z</common:created-date>
+                    <common:last-modified-date>2023-06-09T22:15:12.910Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>Experience in a non-capitalist way: solidarity funds that do not tax interest on the use of money</common:title>
+                    </work:title>
+                    <common:external-ids>
+                        <common:external-id>
+                            <common:external-id-type>isbn</common:external-id-type>
+                            <common:external-id-value>978-9942-29-089-2</common:external-id-value>
+                            <common:external-id-normalized transient="true">9789942290892</common:external-id-normalized>
+                            <common:external-id-relationship>part-of</common:external-id-relationship>
+                        </common:external-id>
+                    </common:external-ids>
+                    <work:type>book-chapter</work:type>
+                    <common:publication-date>
+                        <common:year>2023</common:year>
+                        <common:month>06</common:month>
+                        <common:day>07</common:day>
+                    </common:publication-date>
+                    <work:journal-title>Finanzas éticas y solidarias en América Latina: diagnósticos, debates y propuestas</work:journal-title>
+                </work:work-summary>
+            </activities:group>
+            <activities:group>
+                <common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
+                <common:external-ids/>
+                <work:work-summary put-code="131527819" path="/0000-0001-5045-1000/work/131527819" visibility="public" display-index="1">
+                    <common:created-date>2023-03-24T19:05:36.384Z</common:created-date>
+                    <common:last-modified-date>2023-03-24T19:05:36.384Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>Incidence of artistic practices in the social transformation of the territory. study of case: Hilarte Association, Guayaquil-Ecuador</common:title>
+                    </work:title>
+                    <common:external-ids/>
+                    <work:type>conference-abstract</work:type>
+                    <common:publication-date>
+                        <common:year>2022</common:year>
+                        <common:month>10</common:month>
+                        <common:day>06</common:day>
+                    </common:publication-date>
+                </work:work-summary>
+            </activities:group>
+            <activities:group>
+                <common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
+                <common:external-ids>
+                    <common:external-id>
+                        <common:external-id-type>other-id</common:external-id-type>
+                        <common:external-id-value>2018</common:external-id-value>
+                        <common:external-id-normalized transient="true">2018</common:external-id-normalized>
+                        <common:external-id-relationship>self</common:external-id-relationship>
+                    </common:external-id>
+                </common:external-ids>
+                <work:work-summary put-code="141716337" path="/0000-0001-5045-1000/work/141716337" visibility="public" display-index="1">
+                    <common:created-date>2023-09-04T17:40:30.215Z</common:created-date>
+                    <common:last-modified-date>2023-09-04T17:40:30.215Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</common:title>
+                    </work:title>
+                    <common:external-ids>
+                        <common:external-id>
+                            <common:external-id-type>other-id</common:external-id-type>
+                            <common:external-id-value>2018</common:external-id-value>
+                            <common:external-id-normalized transient="true">2018</common:external-id-normalized>
+                            <common:external-id-relationship>self</common:external-id-relationship>
+                        </common:external-id>
+                    </common:external-ids>
+                    <common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
+                    <work:type>conference-poster</work:type>
+                    <common:publication-date>
+                        <common:year>2018</common:year>
+                        <common:month>11</common:month>
+                        <common:day>30</common:day>
+                    </common:publication-date>
+                    <work:journal-title>Más allá de la transferencia de conocimientos, un espacio para el interaprendizaje y el diálogo de saberes</work:journal-title>
+                </work:work-summary>
+            </activities:group>
+            <activities:group>
+                <common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
+                <common:external-ids/>
+                <work:work-summary put-code="131527433" path="/0000-0001-5045-1000/work/131527433" visibility="public" display-index="1">
+                    <common:created-date>2023-03-24T18:57:10.095Z</common:created-date>
+                    <common:last-modified-date>2023-03-24T18:57:10.095Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>Promotion of the popular and solidarity economy from the state: principles and challenges in the experience of Ecuador</common:title>
+                    </work:title>
+                    <common:external-ids/>
+                    <work:type>dissertation-thesis</work:type>
+                    <common:publication-date>
+                        <common:year>2017</common:year>
+                        <common:month>01</common:month>
+                        <common:day>26</common:day>
+                    </common:publication-date>
+                </work:work-summary>
+            </activities:group>
+            <activities:group>
+                <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+                <common:external-ids/>
+                <work:work-summary put-code="141716713" path="/0000-0001-5045-1000/work/141716713" visibility="public" display-index="1">
+                    <common:created-date>2023-09-04T17:51:57.749Z</common:created-date>
+                    <common:last-modified-date>2023-09-04T17:51:57.749Z</common:last-modified-date>
+                    <common:source>
+                        <common:source-orcid>
+                            <common:uri>https://orcid.org/0000-0001-5045-1000</common:uri>
+                            <common:path>0000-0001-5045-1000</common:path>
+                            <common:host>orcid.org</common:host>
+                        </common:source-orcid>
+                        <common:source-name>Patricio Sánchez Quinchuela</common:source-name>
+                    </common:source>
+                    <work:title>
+                        <common:title>La Rebelión de los Dioses</common:title>
+                    </work:title>
+                    <common:external-ids/>
+                    <common:url>https://drive.google.com/drive/folders/1Tclz6isxGzSjTq-hfTnxe6M1nux-88wF?usp=drive_link</common:url>
+                    <work:type>registered-copyright</work:type>
+                    <common:publication-date>
+                        <common:year>2001</common:year>
+                        <common:month>08</common:month>
+                        <common:day>28</common:day>
+                    </common:publication-date>
+                    <work:journal-title>Editorial pedagógica freire</work:journal-title>
+                </work:work-summary>
+            </activities:group>
+        </activities:works>
+    </activities:activities-summary>
+</record:record>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
@ -133,32 +133,6 @@
            <arg>--targetPath</arg><arg>${inputPathMAG}/dataset</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
-        <ok to="PreProcessORCID"/>
-        <error to="Kill"/>
-    </action>
-
-    <!--  ORCID  SECTION -->
-    <action name="PreProcessORCID">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn-cluster</master>
-            <mode>cluster</mode>
-            <name>Convert ORCID to Dataset</name>
-            <class>eu.dnetlib.doiboost.orcid.SparkPreprocessORCID</class>
-            <jar>dhp-doiboost-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.sql.shuffle.partitions=3840
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${inputPathOrcid}</arg>
-            <arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
-            <arg>--master</arg><arg>yarn-cluster</arg>
-        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
@ -59,10 +59,10 @@
        </property>

        <!--    ORCID Parameters    -->
-        <property>
-            <name>workingPathOrcid</name>
-            <description>the ORCID working path</description>
-        </property>
+<!--        <property>-->
+<!--            <name>workingPathOrcid</name>-->
+<!--            <description>the ORCID working path</description>-->
+<!--        </property>-->

    </parameters>

@ -170,32 +170,6 @@
            <arg>--targetPath</arg><arg>${workingPath}/uwPublication</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
-        <ok to="ProcessORCID"/>
-        <error to="Kill"/>
-    </action>
-
-    <!--  ORCID  SECTION -->
-    <action name="ProcessORCID">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn-cluster</master>
-            <mode>cluster</mode>
-            <name>Convert ORCID to Dataset</name>
-            <class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
-            <jar>dhp-doiboost-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.sql.shuffle.partitions=3840
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPathOrcid}</arg>
-            <arg>--targetPath</arg><arg>${workingPath}/orcidPublication</arg>
-            <arg>--master</arg><arg>yarn-cluster</arg>
-        </spark>
        <ok to="CreateDOIBoost"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -66,7 +66,7 @@ object SparkGenerateDoiBoost {
      Encoders.tuple(Encoders.STRING, mapEncoderPub)
    implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]

-    logger.info("Phase 2) Join Crossref with UnpayWall")
+    logger.info("Phase 1) Join Crossref with UnpayWall")

    val crossrefPublication: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
@ -91,20 +91,10 @@ object SparkGenerateDoiBoost {
      .write
      .mode(SaveMode.Overwrite)
      .save(s"$workingDirPath/firstJoin")
-    logger.info("Phase 3) Join Result with ORCID")
-    val fj: Dataset[(String, Publication)] =
-      spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
-    val orcidPublication: Dataset[(String, Publication)] =
-      spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
-    fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left")
-      .map(applyMerge)
-      .write
-      .mode(SaveMode.Overwrite)
-      .save(s"$workingDirPath/secondJoin")

-    logger.info("Phase 4) Join Result with MAG")
+    logger.info("Phase 2) Join Result with MAG")
    val sj: Dataset[(String, Publication)] =
-      spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
+      spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))

    val magPublication: Dataset[(String, Publication)] =
      spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
--- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -107,7 +107,7 @@ case object Crossref2Oaf {
      .map(f => f.id)
  }

-  def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType:String): Result = {
+  def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats

    //MAPPING Crossref DOI into PID
@ -372,7 +372,7 @@ case object Crossref2Oaf {
      objectType,
      mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
    )
-    mappingResult(result, json, cOBJCategory, originalType)
+    mappingResult(result, json, cOBJCategory, objectSubType)
    if (result == null || result.getId == null)
      return List()

--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json
@ -0,0 +1,26 @@
+[
+  {
+    "paramName": "mt",
+    "paramLongName": "master",
+    "paramDescription": "should be local or yarn",
+    "paramRequired": true
+  },
+  {
+    "paramName": "op",
+    "paramLongName": "orcidPath",
+    "paramDescription": "the path of the orcid Table generated by the dump",
+    "paramRequired": true
+  },
+  {
+    "paramName": "gp",
+    "paramLongName": "graphPath",
+    "paramDescription": "the path of the graph we want to apply enrichment",
+    "paramRequired": true
+  },
+  {
+    "paramName": "tp",
+    "paramLongName": "targetPath",
+    "paramDescription": "the output path of the graph enriched",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/config-default.xml
@ -0,0 +1,34 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>hiveMetastoreUris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>hiveJdbcUrl</name>
+        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
+    </property>
+    <property>
+        <name>hiveDbName</name>
+        <value>openaire</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml
@ -0,0 +1,88 @@
+<workflow-app name="Enrich_graph_with_ORCID_Workflow" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>orcidPath</name>
+            <description>the path of the orcid Table generated by the dump</description>
+        </property>
+        <property>
+            <name>graphPath</name>
+            <description>the path of the graph we want to apply enrichment</description>
+        </property>
+        <property>
+            <name>targetPath</name>
+            <description>the output path of the graph enriched</description>
+        </property>
+    </parameters>
+
+    <start to="EnrichGraph"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="EnrichGraph">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Enrich Graph with ORCID</name>
+            <class>eu.dnetlib.dhp.enrich.orcid.SparkEnrichGraphWithOrcidAuthors</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.executor.memoryOverhead=2g
+                --conf spark.sql.shuffle.partitions=3000
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--orcidPath</arg>
+            <arg>${orcidPath}</arg>
+            <arg>--targetPath</arg>
+            <arg>${targetPath}</arg>
+            <arg>--graphPath</arg>
+            <arg>${graphPath}</arg>
+            <arg>--master</arg>
+            <arg>yarn</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="copy_datasource">
+        <distcp xmlns="uri:oozie:distcp-action:0.2">
+            <arg>${nameNode}/${graphPath}/datasource</arg>
+            <arg>${nameNode}/${targetPath}/datasource</arg>
+        </distcp>
+        <ok to="copy_organization"/>
+        <error to="Kill"/>
+    </action>
+    <action name="copy_organization">
+        <distcp xmlns="uri:oozie:distcp-action:0.2">
+            <arg>${nameNode}/${graphPath}/organization</arg>
+            <arg>${nameNode}/${targetPath}/organization</arg>
+        </distcp>
+        <ok to="copy_project"/>
+        <error to="Kill"/>
+    </action>
+    <action name="copy_project">
+        <distcp xmlns="uri:oozie:distcp-action:0.2">
+            <arg>${nameNode}/${graphPath}/project</arg>
+            <arg>${nameNode}/${targetPath}/project</arg>
+        </distcp>
+        <ok to="copy_relation"/>
+        <error to="Kill"/>
+    </action>
+    <action name="copy_relation">
+        <distcp xmlns="uri:oozie:distcp-action:0.2">
+            <arg>${nameNode}/${graphPath}/relation</arg>
+            <arg>${nameNode}/${targetPath}/relation</arg>
+        </distcp>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    <end name="End"/>
+
+</workflow-app>
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/AuthorEnricher.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/AuthorEnricher.scala
@ -0,0 +1,40 @@
+package eu.dnetlib.dhp.enrich.orcid
+
+import eu.dnetlib.dhp.schema.common.ModelConstants
+import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
+import eu.dnetlib.dhp.schema.sx.OafUtils
+import org.apache.spark.sql.Row
+
+import scala.collection.JavaConverters._
+
+object AuthorEnricher extends Serializable {
+
+  def createAuthor(givenName: String, familyName: String, orcid: String): Author = {
+    val a = new Author
+    a.setName(givenName)
+    a.setSurname(familyName)
+    a.setFullname(s"$givenName $familyName")
+    val pid = OafUtils.createSP(orcid, ModelConstants.ORCID, ModelConstants.ORCID)
+    pid.setDataInfo(OafUtils.generateDataInfo())
+    pid.getDataInfo.setProvenanceaction(OafUtils.createQualifier("ORCID_ENRICHMENT", "ORCID_ENRICHMENT"))
+    a.setPid(List(pid).asJava)
+    a
+  }
+
+  def toOAFAuthor(r: Row): java.util.List[Author] = {
+    r.getList[Row](1)
+      .asScala
+      .map(s => createAuthor(s.getAs[String]("givenName"), s.getAs[String]("familyName"), s.getAs[String]("orcid")))
+      .toList
+      .asJava
+  }
+
+//  def enrichAuthor(p:Publication,r:Row): Unit = {
+//    val k:Map[String, OAuthor] =r.getList[Row](1).asScala.map(s => (s.getAs[String]("orcid"), OAuthor(s.getAs[String]("givenName") ,s.getAs[String]("familyName") ))).groupBy(_._1).mapValues(_.map(_._2).head)
+//    println(k)
+//
+//
+//
+//  }
+
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala
@ -0,0 +1,138 @@
+package eu.dnetlib.dhp.enrich.orcid
+
+import eu.dnetlib.dhp.application.AbstractScalaApplication
+import eu.dnetlib.dhp.oa.merge.AuthorMerger
+import eu.dnetlib.dhp.schema.common.ModelSupport
+import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql._
+import org.slf4j.{Logger, LoggerFactory}
+import scala.collection.JavaConverters._
+
+class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
+    extends AbstractScalaApplication(propertyPath, args, log: Logger) {
+
+  /** Here all the spark applications runs this method
+    * where the whole logic of the spark node is defined
+    */
+  override def run(): Unit = {
+    val graphPath = parser.get("graphPath")
+    log.info(s"graphPath is '$graphPath'")
+    val orcidPath = parser.get("orcidPath")
+    log.info(s"orcidPath is '$orcidPath'")
+    val targetPath = parser.get("targetPath")
+    log.info(s"targetPath is '$targetPath'")
+    val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
+//    ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result))
+
+    enrichResult(
+      spark,
+      s"$graphPath/publication",
+      orcidPublication,
+      s"$targetPath/publication",
+      Encoders.bean(classOf[Publication])
+    )
+    enrichResult(
+      spark,
+      s"$graphPath/dataset",
+      orcidPublication,
+      s"$targetPath/dataset",
+      Encoders.bean(classOf[eu.dnetlib.dhp.schema.oaf.Dataset])
+    )
+    enrichResult(
+      spark,
+      s"$graphPath/software",
+      orcidPublication,
+      s"$targetPath/software",
+      Encoders.bean(classOf[Software])
+    )
+    enrichResult(
+      spark,
+      s"$graphPath/otherresearchproduct",
+      orcidPublication,
+      s"$targetPath/otherresearchproduct",
+      Encoders.bean(classOf[OtherResearchProduct])
+    )
+  }
+
+  private def enrichResult[T <: Result](
+    spark: SparkSession,
+    graphPath: String,
+    orcidPublication: Dataset[Row],
+    outputPath: String,
+    enc: Encoder[T]
+  ): Unit = {
+
+    val entities = spark.read
+      .schema(enc.schema)
+      .json(graphPath)
+      .select(col("id"), col("datainfo"), col("instance"))
+      .where("datainfo.deletedbyinference != true")
+      .drop("datainfo")
+      .withColumn("instances", explode(col("instance")))
+      .withColumn("pids", explode(col("instances.pid")))
+      .select(
+        col("pids.qualifier.classid").alias("pid_schema"),
+        col("pids.value").alias("pid_value"),
+        col("id").alias("dnet_id")
+      )
+
+    val orcidDnet = orcidPublication
+      .join(
+        entities,
+        lower(col("schema")).equalTo(lower(col("pid_schema"))) &&
+        lower(col("value")).equalTo(lower(col("pid_value"))),
+        "inner"
+      )
+      .groupBy(col("dnet_id"))
+      .agg(collect_set(orcidPublication("author")).alias("orcid_authors"))
+      .select("dnet_id", "orcid_authors")
+      .cache()
+    orcidDnet.count()
+    val result = spark.read.schema(enc.schema).json(graphPath).as[T](enc)
+
+    result
+      .joinWith(orcidDnet, result("id").equalTo(orcidDnet("dnet_id")), "left")
+      .map {
+        case (r: T, null) =>
+          r
+        case (p: T, r: Row) =>
+          p.setAuthor(AuthorMerger.enrichOrcid(p.getAuthor, AuthorEnricher.toOAFAuthor(r)))
+          p
+      }(enc)
+      .write
+      .mode(SaveMode.Overwrite)
+      .option("compression", "gzip")
+      .json(outputPath)
+  }
+
+  private def generateOrcidTable(spark: SparkSession, inputPath: String): Dataset[Row] = {
+    val orcidAuthors =
+      spark.read.load(s"$inputPath/Authors").select("orcid", "familyName", "givenName", "creditName", "otherNames")
+    val orcidWorks = spark.read
+      .load(s"$inputPath/Works")
+      .select(col("orcid"), explode(col("pids")).alias("identifier"))
+      .where(
+        "identifier.schema IN('doi','pmid','pmc','arxiv','handle')"
+      )
+    val orcidPublication = orcidAuthors
+      .join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))
+      .select(
+        col("identifier.schema").alias("schema"),
+        col("identifier.value").alias("value"),
+        struct(orcidAuthors("orcid").alias("orcid"), col("givenName"), col("familyName")).alias("author")
+      )
+    orcidPublication.cache()
+  }
+}
+
+object SparkEnrichGraphWithOrcidAuthors {
+
+  val log: Logger = LoggerFactory.getLogger(SparkEnrichGraphWithOrcidAuthors.getClass)
+
+  def main(args: Array[String]): Unit = {
+    new SparkEnrichGraphWithOrcidAuthors("/eu/dnetlib/dhp/enrich/orcid/enrich_graph_orcid_parameters.json", args, log)
+      .initialize()
+      .run()
+  }
+}
--- a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/enrich/orcid/EnrichOrcidTest.scala
@ -0,0 +1,77 @@
+package eu.dnetlib.dhp.enrich.orcid
+
+import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
+import org.apache.spark.sql.{Column, Encoder, Encoders, Row, SparkSession}
+import org.junit.jupiter.api.Test
+import org.slf4j.{Logger, LoggerFactory}
+import org.apache.spark.sql.functions._
+
+class EnrichOrcidTest {
+
+  val log: Logger = LoggerFactory.getLogger(getClass)
+
+  def test() = {
+    val spark = SparkSession.builder().master("local[*]").getOrCreate()
+//    spark.sparkContext.setLogLevel("ERROR")
+
+//    new SparkEnrichGraphWithOrcidAuthors(null, null, null)
+//      .enrichResult(
+//        spark,
+//        "/Users/sandro/orcid_test/publication",
+//        "",
+//        "/tmp/graph/",
+//        Encoders.bean(classOf[Publication])
+//      )
+
+    val schema = Encoders.bean(classOf[Publication]).schema
+//
+//    val simplifyAuthor = udf((r: Seq[Row]) => {
+//      r
+//        .map(k =>
+//          AuthorPid(
+//            k.getAs[String]("fullname"),
+//            k.getAs[Seq[Row]]("pid")
+//              .map(p => Pid(p.getAs[Row]("qualifier").getAs[String]("classid"), p.getAs[String]("value")))
+//              .toList
+//          )
+//        )
+//        .filter(l => l.pids.nonEmpty)
+//        .toList
+//    })
+//
+//    val wrong_orcid_intersection = udf((a: Seq[Row]) => {
+//      a.map(author => {
+//        val pids_with_orcid: Seq[Row] = author
+//          .getAs[Seq[Row]]("pids")
+//          .filter(p =>
+//            p.getAs[String]("pidScheme") != null && p.getAs[String]("pidScheme").toLowerCase.contains("orcid")
+//          )
+//        if (pids_with_orcid.exists(p => p.getAs[String]("pidScheme").equals("ORCID"))) {
+//          if (pids_with_orcid.map(p => p.getAs[String]("pidValue").toLowerCase).distinct.size > 1) {
+//            AuthorPid(
+//              author.getAs[String]("fullName"),
+//              pids_with_orcid.map(p => Pid(p.getAs[String]("pidScheme"), p.getAs[String]("pidValue"))).toList
+//            )
+//
+//          } else
+//            null
+//        } else
+//          null
+//      }).filter(author => author != null)
+//    })
+
+    Encoders
+    import spark.implicits._
+
+//    val enriched = spark.read
+//      .schema(schema)
+//      .json("/Users/sandro/orcid_test/publication_enriched")
+//      .select(col("id"), explode(col("author")).as("authors"))
+//      .withColumn("ap", col("authors.pid.qualifier.classid"))
+//      .withColumn("dp", col("authors.pid.datainfo.provenanceAction.classid"))
+//
+//      .show()
+
+  }
+
+}