Merge pull request 'bipFinder_resolve_conflicts' (#73) from bipFinder_resolve_conflicts into stable_ids

2020-12-10 11:00:46 +01:00 · 2020-12-10 11:00:46 +01:00 · 3c10941376
parent 3c5ce1dada 12e2f930c8
commit 3c10941376
165 changed files with 10963 additions and 1208 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
@ -0,0 +1,30 @@
+
+package eu.dnetlib.dhp.common;
+
+import java.util.Map;
+
+import com.google.common.collect.Maps;
+
+public class Constants {
+
+	public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
+	public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
+
+	public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
+
+	static {
+		accessRightsCoarMap.put("OPEN", "c_abf2");
+		accessRightsCoarMap.put("RESTRICTED", "c_16ec");
+		accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
+		accessRightsCoarMap.put("CLOSED", "c_14cb");
+		accessRightsCoarMap.put("EMBARGO", "c_f1cf");
+	}
+
+	static {
+		coarCodeLabelMap.put("c_abf2", "OPEN");
+		coarCodeLabelMap.put("c_16ec", "RESTRICTED");
+		coarCodeLabelMap.put("c_14cb", "CLOSED");
+		coarCodeLabelMap.put("c_f1cf", "EMBARGO");
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java
@ -0,0 +1,412 @@
+
+package eu.dnetlib.dhp.common;
+
+import java.io.Serializable;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.dump.oaf.*;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+import eu.dnetlib.dhp.schema.oaf.DataInfo;
+import eu.dnetlib.dhp.schema.oaf.Field;
+import eu.dnetlib.dhp.schema.oaf.Journal;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class GraphResultMapper implements Serializable {
+
+	public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
+		E in) {
+
+		CommunityResult out = new CommunityResult();
+
+		eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
+		Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
+		if (ort.isPresent()) {
+			switch (ort.get().getClassid()) {
+				case "publication":
+					Optional<Journal> journal = Optional
+						.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
+					if (journal.isPresent()) {
+						Journal j = journal.get();
+						Container c = new Container();
+						c.setConferencedate(j.getConferencedate());
+						c.setConferenceplace(j.getConferenceplace());
+						c.setEdition(j.getEdition());
+						c.setEp(j.getEp());
+						c.setIss(j.getIss());
+						c.setIssnLinking(j.getIssnLinking());
+						c.setIssnOnline(j.getIssnOnline());
+						c.setIssnPrinted(j.getIssnPrinted());
+						c.setName(j.getName());
+						c.setSp(j.getSp());
+						c.setVol(j.getVol());
+						out.setContainer(c);
+						out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
+					}
+					break;
+				case "dataset":
+					eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
+					Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
+					Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
+
+					out
+						.setGeolocation(
+							Optional
+								.ofNullable(id.getGeolocation())
+								.map(
+									igl -> igl
+										.stream()
+										.filter(Objects::nonNull)
+										.map(gli -> {
+											GeoLocation gl = new GeoLocation();
+											gl.setBox(gli.getBox());
+											gl.setPlace(gli.getPlace());
+											gl.setPoint(gli.getPoint());
+											return gl;
+										})
+										.collect(Collectors.toList()))
+								.orElse(null));
+
+					out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
+					break;
+				case "software":
+
+					eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
+					Optional
+						.ofNullable(is.getCodeRepositoryUrl())
+						.ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
+					Optional
+						.ofNullable(is.getDocumentationUrl())
+						.ifPresent(
+							value -> out
+								.setDocumentationUrl(
+									value
+										.stream()
+										.map(v -> v.getValue())
+										.collect(Collectors.toList())));
+
+					Optional
+						.ofNullable(is.getProgrammingLanguage())
+						.ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));
+
+					out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
+					break;
+				case "other":
+
+					eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
+					out
+						.setContactgroup(
+							Optional
+								.ofNullable(ir.getContactgroup())
+								.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
+								.orElse(null));
+
+					out
+						.setContactperson(
+							Optional
+								.ofNullable(ir.getContactperson())
+								.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
+								.orElse(null));
+					out
+						.setTool(
+							Optional
+								.ofNullable(ir.getTool())
+								.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
+								.orElse(null));
+
+					out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
+
+					break;
+			}
+
+			Optional
+				.ofNullable(input.getAuthor())
+				.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));
+
+			// I do not map Access Right UNKNOWN or OTHER
+
+			Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
+			if (oar.isPresent()) {
+				if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
+					String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
+					out
+						.setBestaccessright(
+							AccessRight
+								.newInstance(
+									code,
+									Constants.coarCodeLabelMap.get(code),
+									Constants.COAR_ACCESS_RIGHT_SCHEMA));
+				}
+			}
+
+			final List<String> contributorList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getContributor())
+				.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
+			out.setContributor(contributorList);
+
+			Optional
+				.ofNullable(input.getCountry())
+				.ifPresent(
+					value -> out
+						.setCountry(
+							value
+								.stream()
+								.map(
+									c -> {
+										if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
+											return null;
+										}
+										Country country = new Country();
+										country.setCode(c.getClassid());
+										country.setLabel(c.getClassname());
+										Optional
+											.ofNullable(c.getDataInfo())
+											.ifPresent(
+												provenance -> country
+													.setProvenance(
+														Provenance
+															.newInstance(
+																provenance
+																	.getProvenanceaction()
+																	.getClassname(),
+																c.getDataInfo().getTrust())));
+										return country;
+									})
+								.filter(Objects::nonNull)
+								.collect(Collectors.toList())));
+
+			final List<String> coverageList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getCoverage())
+				.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
+			out.setCoverage(coverageList);
+
+			out.setDateofcollection(input.getDateofcollection());
+
+			final List<String> descriptionList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getDescription())
+				.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
+			out.setDescription(descriptionList);
+			Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
+			if (oStr.isPresent()) {
+				out.setEmbargoenddate(oStr.get().getValue());
+			}
+
+			final List<String> formatList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getFormat())
+				.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
+			out.setFormat(formatList);
+			out.setId(input.getId());
+			out.setOriginalId(input.getOriginalId());
+
+			Optional<List<eu.dnetlib.dhp.schema.oaf.Instance>> oInst = Optional
+				.ofNullable(input.getInstance());
+
+			if (oInst.isPresent()) {
+				out
+					.setInstance(
+						oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList()));
+
+			}
+
+			Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
+			if (oL.isPresent()) {
+				eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
+				out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
+			}
+			Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
+			if (oLong.isPresent()) {
+				out.setLastupdatetimestamp(oLong.get());
+			}
+			Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
+			if (otitle.isPresent()) {
+				List<StructuredProperty> iTitle = otitle
+					.get()
+					.stream()
+					.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
+					.collect(Collectors.toList());
+				if (iTitle.size() > 0) {
+					out.setMaintitle(iTitle.get(0).getValue());
+				}
+
+				iTitle = otitle
+					.get()
+					.stream()
+					.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
+					.collect(Collectors.toList());
+				if (iTitle.size() > 0) {
+					out.setSubtitle(iTitle.get(0).getValue());
+				}
+
+			}
+
+			List<ControlledField> pids = new ArrayList<>();
+			Optional
+				.ofNullable(input.getPid())
+				.ifPresent(
+					value -> value
+						.stream()
+						.forEach(
+							p -> pids
+								.add(
+									ControlledField
+										.newInstance(p.getQualifier().getClassid(), p.getValue()))));
+			out.setPid(pids);
+			oStr = Optional.ofNullable(input.getDateofacceptance());
+			if (oStr.isPresent()) {
+				out.setPublicationdate(oStr.get().getValue());
+			}
+			oStr = Optional.ofNullable(input.getPublisher());
+			if (oStr.isPresent()) {
+				out.setPublisher(oStr.get().getValue());
+			}
+
+			List<String> sourceList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getSource())
+				.ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
+			// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
+			List<Subject> subjectList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getSubject())
+				.ifPresent(
+					value -> value
+						.forEach(s -> subjectList.add(getSubject(s))));
+
+			out.setSubjects(subjectList);
+
+			out.setType(input.getResulttype().getClassid());
+		}
+
+		out
+			.setCollectedfrom(
+				input
+					.getCollectedfrom()
+					.stream()
+					.map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
+					.collect(Collectors.toList()));
+
+		return out;
+
+	}
+
+	private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
+		CommunityInstance instance = new CommunityInstance();
+
+		setCommonValue(i, instance);
+
+		instance
+			.setCollectedfrom(
+				KeyValue
+					.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
+
+		instance
+			.setHostedby(
+				KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
+
+		return instance;
+
+	}
+
+	private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
+		Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> opAr = Optional
+			.ofNullable(i.getAccessright());
+		if (opAr.isPresent()) {
+			if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
+				String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
+				instance
+					.setAccessright(
+						AccessRight
+							.newInstance(
+								code,
+								Constants.coarCodeLabelMap.get(code),
+								Constants.COAR_ACCESS_RIGHT_SCHEMA));
+			}
+		}
+
+		Optional
+			.ofNullable(i.getLicense())
+			.ifPresent(value -> instance.setLicense(value.getValue()));
+		Optional
+			.ofNullable(i.getDateofacceptance())
+			.ifPresent(value -> instance.setPublicationdate(value.getValue()));
+		Optional
+			.ofNullable(i.getRefereed())
+			.ifPresent(value -> instance.setRefereed(value.getClassname()));
+		Optional
+			.ofNullable(i.getInstancetype())
+			.ifPresent(value -> instance.setType(value.getClassname()));
+		Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
+
+	}
+
+	private static Subject getSubject(StructuredProperty s) {
+		Subject subject = new Subject();
+		subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
+		Optional<DataInfo> di = Optional.ofNullable(s.getDataInfo());
+		if (di.isPresent()) {
+			Provenance p = new Provenance();
+			p.setProvenance(di.get().getProvenanceaction().getClassname());
+			p.setTrust(di.get().getTrust());
+			subject.setProvenance(p);
+		}
+
+		return subject;
+	}
+
+	private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
+		Author a = new Author();
+		a.setFullname(oa.getFullname());
+		a.setName(oa.getName());
+		a.setSurname(oa.getSurname());
+		a.setRank(oa.getRank());
+
+		Optional<List<StructuredProperty>> oPids = Optional
+			.ofNullable(oa.getPid());
+		if (oPids.isPresent()) {
+			Pid pid = getOrcid(oPids.get());
+			if (pid != null) {
+				a.setPid(pid);
+			}
+		}
+
+		return a;
+	}
+
+	private static Pid getOrcid(List<StructuredProperty> p) {
+		for (StructuredProperty pid : p) {
+			if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
+				Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
+				if (di.isPresent()) {
+					return Pid
+						.newInstance(
+							ControlledField
+								.newInstance(
+									pid.getQualifier().getClassid(),
+									pid.getValue()),
+							Provenance
+								.newInstance(
+									di.get().getProvenanceaction().getClassname(),
+									di.get().getTrust()));
+				} else {
+					return Pid
+						.newInstance(
+							ControlledField
+								.newInstance(
+									pid.getQualifier().getClassid(),
+									pid.getValue())
+
+						);
+				}
+
+			}
+		}
+		return null;
+	}
+
+}
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
@ -90,9 +90,6 @@ public class MakeTarArchive implements Serializable {
 		String p_string = p.toString();
 		if (!p_string.endsWith("_SUCCESS")) {
 			String name = p_string.substring(p_string.lastIndexOf("/") + 1);
-			if (name.trim().equalsIgnoreCase("communities_infrastructures")) {
-				name = "communities_infrastructures.json";
-			}
 			TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
 			entry.setSize(fileStatus.getLen());
 			current_size += fileStatus.getLen();
--- a/dhp-schemas/pom.xml
+++ b/dhp-schemas/pom.xml
@ -6,7 +6,7 @@
        <groupId>eu.dnetlib.dhp</groupId>
        <artifactId>dhp</artifactId>
        <version>1.2.4-SNAPSHOT</version>
-        <relativePath>../</relativePath>
+        <relativePath>../pom.xml</relativePath>
    </parent>

    <artifactId>dhp-schemas</artifactId>
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java
@ -1,6 +1,7 @@

 package eu.dnetlib.dhp.schema.oaf;

+import java.io.Serializable;
 import java.util.List;

 import com.google.common.base.Objects;
@ -8,7 +9,7 @@ import com.google.common.base.Objects;
 /**
 * Represent a measure, must be further described by a system available resource providing name and descriptions.
 */
-public class Measure {
+public class Measure implements Serializable {

 	/**
 	 * Unique measure identifier.
@ -16,7 +17,7 @@ public class Measure {
 	private String id;

 	/**
-	 * List of units associated with this measure. KeyValue provides a pair to store the laber (key) and the value, plus
+	 * List of units associated with this measure. KeyValue provides a pair to store the label (key) and the value, plus
 	 * common provenance information.
 	 */
 	private List<KeyValue> unit;
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java
@ -2,8 +2,12 @@
 package eu.dnetlib.dhp.schema.oaf;

 import java.io.Serializable;
+import java.util.Collection;
 import java.util.List;
 import java.util.Objects;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;

 public abstract class Oaf implements Serializable {

@ -40,9 +44,36 @@ public abstract class Oaf implements Serializable {
 		this.lastupdatetimestamp = lastupdatetimestamp;
 	}

-	public void mergeOAFDataInfo(Oaf e) {
-		if (e.getDataInfo() != null && compareTrust(this, e) < 0)
-			dataInfo = e.getDataInfo();
+	public void mergeFrom(Oaf o) {
+		if (Objects.isNull(o)) {
+			return;
+		}
+		setCollectedfrom(
+			Stream
+				.concat(
+					Optional
+						.ofNullable(getCollectedfrom())
+						.map(Collection::stream)
+						.orElse(Stream.empty()),
+					Optional
+						.ofNullable(o.getCollectedfrom())
+						.map(Collection::stream)
+						.orElse(Stream.empty()))
+				.distinct() // relies on KeyValue.equals
+				.collect(Collectors.toList()));
+
+		mergeOAFDataInfo(o);
+
+		setLastupdatetimestamp(
+			Math
+				.max(
+					Optional.ofNullable(getLastupdatetimestamp()).orElse(0L),
+					Optional.ofNullable(o.getLastupdatetimestamp()).orElse(0L)));
+	}
+
+	public void mergeOAFDataInfo(Oaf o) {
+		if (o.getDataInfo() != null && compareTrust(this, o) < 0)
+			dataInfo = o.getDataInfo();
 	}

 	protected String extractTrust(Oaf e) {
@ -62,7 +93,7 @@ public abstract class Oaf implements Serializable {
 		if (o == null || getClass() != o.getClass())
 			return false;
 		Oaf oaf = (Oaf) o;
-		return Objects.equals(dataInfo, oaf.dataInfo)
+		return Objects.equals(getDataInfo(), oaf.getDataInfo())
 			&& Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
 	}

--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java
@ -78,14 +78,10 @@ public abstract class OafEntity extends Oaf implements Serializable {
 	}

 	public void mergeFrom(OafEntity e) {
-
-		if (e == null)
-			return;
+		super.mergeFrom(e);

 		originalId = mergeLists(originalId, e.getOriginalId());

-		collectedfrom = mergeLists(collectedfrom, e.getCollectedfrom());
-
 		pid = mergeLists(pid, e.getPid());

 		if (e.getDateofcollection() != null && compareTrust(this, e) < 0)
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
@ -130,19 +130,7 @@ public class Relation extends Oaf {
 			Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal");
 		checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal");

-		setCollectedfrom(
-			Stream
-				.concat(
-					Optional
-						.ofNullable(getCollectedfrom())
-						.map(Collection::stream)
-						.orElse(Stream.empty()),
-					Optional
-						.ofNullable(r.getCollectedfrom())
-						.map(Collection::stream)
-						.orElse(Stream.empty()))
-				.distinct() // relies on KeyValue.equals
-				.collect(Collectors.toList()));
+		super.mergeFrom(r);
 	}

 	@Override
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java
@ -1,7 +1,14 @@

-package eu.dnetlib.doiboost.orcid.model;
+package eu.dnetlib.dhp.schema.orcid;

 import java.io.Serializable;
+import java.util.List;
+
+import com.google.common.collect.Lists;
+
+/**
+ * This class models the data that are retrieved from orcid publication
+ */

 public class AuthorData implements Serializable {

@ -10,6 +17,7 @@ public class AuthorData implements Serializable {
 	private String surname;
 	private String creditName;
 	private String errorCode;
+	private List<String> otherNames;

 	public String getErrorCode() {
 		return errorCode;
@ -50,4 +58,15 @@ public class AuthorData implements Serializable {
 	public void setOid(String oid) {
 		this.oid = oid;
 	}
+
+	public List<String> getOtherNames() {
+		return otherNames;
+	}
+
+	public void setOtherNames(List<String> otherNames) {
+		if (this.otherNames == null) {
+			this.otherNames = Lists.newArrayList();
+		}
+		this.otherNames = otherNames;
+	}
 }
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java
@ -0,0 +1,25 @@
+
+package eu.dnetlib.dhp.schema.orcid;
+
+import java.util.List;
+
+public class OrcidDOI {
+	private String doi;
+	private List<AuthorData> authors;
+
+	public String getDoi() {
+		return doi;
+	}
+
+	public void setDoi(String doi) {
+		this.doi = doi;
+	}
+
+	public List<AuthorData> getAuthors() {
+		return authors;
+	}
+
+	public void setAuthors(List<AuthorData> authors) {
+		this.authors = authors;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java
@ -0,0 +1,28 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * Class that maps the model of the bipFinder! input data.
+ * Only needed for deserialization purposes
+ */
+
+public class BipDeserialize extends HashMap<String, List<Score>> implements Serializable {
+
+	public BipDeserialize() {
+		super();
+	}
+
+	public List<Score> get(String key) {
+
+		if (super.get(key) == null) {
+			return new ArrayList<>();
+		}
+		return super.get(key);
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java
@ -0,0 +1,30 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Rewriting of the bipFinder input data by extracting the identifier of the result (doi)
+ */
+
+public class BipScore implements Serializable {
+	private String id; //doi
+	private List<Score> scoreList; //unit as given in the inputfile
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public List<Score> getScoreList() {
+		return scoreList;
+	}
+
+	public void setScoreList(List<Score> scoreList) {
+		this.scoreList = scoreList;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java
@ -0,0 +1,85 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+/**
+ * Just collects all the atomic actions produced for the different results and saves them in
+ * outputpath for the ActionSet
+ */
+public class CollectAndSave implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static <I extends Result> void main(String[] args) throws Exception {
+
+		String jsonConfiguration = IOUtils
+			.toString(
+				CollectAndSave.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("inputPath");
+		log.info("inputPath {}: ", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}: ", outputPath);
+
+		SparkConf conf = new SparkConf();
+
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				removeOutputDir(spark, outputPath);
+				collectAndSave(spark, inputPath, outputPath);
+			});
+	}
+
+	private static void collectAndSave(SparkSession spark, String inputPath, String outputPath) {
+		JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		sc
+			.sequenceFile(inputPath + "/publication", Text.class, Text.class)
+			.union(sc.sequenceFile(inputPath + "/dataset", Text.class, Text.class))
+			.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
+			.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
+			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
+		;
+	}
+
+	private static void removeOutputDir(SparkSession spark, String path) {
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/KeyValue.java
@ -0,0 +1,26 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+
+public class KeyValue implements Serializable {
+
+	private String key;
+	private String value;
+
+	public String getKey() {
+		return key;
+	}
+
+	public void setKey(String key) {
+		this.key = key;
+	}
+
+	public String getValue() {
+		return value;
+	}
+
+	public void setValue(String value) {
+		this.value = value;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/PreparedResult.java
@ -0,0 +1,28 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+
+/**
+ * Subset of the information of the generic results that are needed to create the atomic action
+ */
+public class PreparedResult implements Serializable {
+	private String id; // openaire id
+	private String value; // doi
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public String getValue() {
+		return value;
+	}
+
+	public void setValue(String value) {
+		this.value = value;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Score.java
@ -0,0 +1,30 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * represents the score in the input file
+ */
+public class Score implements Serializable {
+
+	private String id;
+	private List<KeyValue> unit;
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public List<KeyValue> getUnit() {
+		return unit;
+	}
+
+	public void setUnit(List<KeyValue> unit) {
+		this.unit = unit;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java
@ -0,0 +1,200 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import scala.Tuple2;
+
+/**
+ * created the Atomic Action for each tipe of results
+ */
+public class SparkAtomicActionScoreJob implements Serializable {
+
+	private static String DOI = "doi";
+	private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static <I extends Result> void main(String[] args) throws Exception {
+
+		String jsonConfiguration = IOUtils
+			.toString(
+				SparkAtomicActionScoreJob.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("inputPath");
+		log.info("inputPath {}: ", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath {}: ", outputPath);
+
+		final String bipScorePath = parser.get("bipScorePath");
+		log.info("bipScorePath: {}", bipScorePath);
+
+		final String resultClassName = parser.get("resultTableName");
+		log.info("resultTableName: {}", resultClassName);
+
+		Class<I> inputClazz = (Class<I>) Class.forName(resultClassName);
+
+		SparkConf conf = new SparkConf();
+
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				removeOutputDir(spark, outputPath);
+				prepareResults(spark, inputPath, outputPath, bipScorePath, inputClazz);
+			});
+	}
+
+	private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
+		String bipScorePath, Class<I> inputClazz) {
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
+			.textFile(bipScorePath)
+			.map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class));
+
+		Dataset<BipScore> bipScores = spark
+			.createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> {
+				BipScore bs = new BipScore();
+				bs.setId(key);
+				bs.setScoreList(entry.get(key));
+				return bs;
+			}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
+
+		System.out.println(bipScores.count());
+
+		Dataset<I> results = readPath(spark, inputPath, inputClazz);
+
+		results.createOrReplaceTempView("result");
+
+		Dataset<PreparedResult> preparedResult = spark
+			.sql(
+				"select pIde.value value, id " +
+					"from result " +
+					"lateral view explode (pid) p as pIde " +
+					"where dataInfo.deletedbyinference = false and pIde.qualifier.classid = '" + DOI + "'")
+			.as(Encoders.bean(PreparedResult.class));
+
+		bipScores
+			.joinWith(
+				preparedResult, bipScores.col("id").equalTo(preparedResult.col("value")),
+				"inner")
+			.map((MapFunction<Tuple2<BipScore, PreparedResult>, BipScore>) value -> {
+				BipScore ret = value._1();
+				ret.setId(value._2().getId());
+				return ret;
+			}, Encoders.bean(BipScore.class))
+			.groupByKey((MapFunction<BipScore, String>) value -> value.getId(), Encoders.STRING())
+			.mapGroups((MapGroupsFunction<String, BipScore, I>) (k, it) -> {
+				Result ret = inputClazz.newInstance();
+				BipScore first = it.next();
+				ret.setId(first.getId());
+
+				ret.setMeasures(getMeasure(first));
+				it.forEachRemaining(value -> ret.getMeasures().addAll(getMeasure(value)));
+
+				return (I) ret;
+			}, Encoders.bean(inputClazz))
+			.toJavaRDD()
+			.map(p -> new AtomicAction(inputClazz, p))
+			.mapToPair(
+				aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
+					new Text(OBJECT_MAPPER.writeValueAsString(aa))))
+			.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
+
+	}
+
+
+	private static List<Measure> getMeasure(BipScore value) {
+		return value
+			.getScoreList()
+			.stream()
+			.map(score -> {
+				Measure m = new Measure();
+				m.setId(score.getId());
+				m
+					.setUnit(
+						score
+							.getUnit()
+							.stream()
+							.map(unit -> {
+								KeyValue kv = new KeyValue();
+								kv.setValue(unit.getValue());
+								kv.setKey(unit.getKey());
+								kv.setDataInfo(getDataInfo());
+								return kv;
+							})
+							.collect(Collectors.toList()));
+				return m;
+			})
+			.collect(Collectors.toList());
+	}
+
+	private static DataInfo getDataInfo() {
+		DataInfo di = new DataInfo();
+		di.setInferred(false);
+		di.setInvisible(false);
+		di.setDeletedbyinference(false);
+		di.setTrust("");
+		Qualifier qualifier = new Qualifier();
+		qualifier.setClassid("sysimport:actionset");
+		qualifier.setClassname("Harvested");
+		qualifier.setSchemename("dnet:provenanceActions");
+		qualifier.setSchemeid("dnet:provenanceActions");
+		di.setProvenanceaction(qualifier);
+		return di;
+	}
+
+	private static void removeOutputDir(SparkSession spark, String path) {
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+	}
+
+	public static <R> Dataset<R> readPath(
+		SparkSession spark, String inputPath, Class<R> clazz) {
+		return spark
+			.read()
+			.textFile(inputPath)
+			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json
@ -0,0 +1,20 @@
+[
+  {
+  "paramName": "issm",
+  "paramLongName": "isSparkSessionManaged",
+  "paramDescription": "when true will stop SparkSession after job execution",
+  "paramRequired": false
+},
+{
+"paramName": "ip",
+"paramLongName": "inputPath",
+"paramDescription": "the URL from where to get the programme file",
+"paramRequired": true
+},
+{
+"paramName": "o",
+"paramLongName": "outputPath",
+"paramDescription": "the path of the new ActionSet",
+"paramRequired": true
+}
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/input_parameters.json
@ -0,0 +1,32 @@
+[
+  {
+  "paramName": "issm",
+  "paramLongName": "isSparkSessionManaged",
+  "paramDescription": "when true will stop SparkSession after job execution",
+  "paramRequired": false
+},
+{
+"paramName": "ip",
+"paramLongName": "inputPath",
+"paramDescription": "the URL from where to get the programme file",
+"paramRequired": true
+},
+{
+"paramName": "o",
+"paramLongName": "outputPath",
+"paramDescription": "the path of the new ActionSet",
+"paramRequired": true
+},
+  {
+    "paramName": "rtn",
+    "paramLongName": "resultTableName",
+    "paramDescription": "the path of the new ActionSet",
+    "paramRequired": true
+  },
+  {
+    "paramName": "bsp",
+    "paramLongName": "bipScorePath",
+    "paramDescription": "the path of the new ActionSet",
+    "paramRequired": true
+  }
+]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/config-default.xml
@ -1,11 +1,11 @@
 <configuration>
    <property>
        <name>jobTracker</name>
-        <value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
+        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
-        <value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
+        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
@ -15,28 +15,44 @@
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
+    <property>
+        <name>hive_metastore_uris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>spark2YarnHistoryServerAddress</name>
+        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
+    </property>
+    <property>
+        <name>spark2ExtraListeners</name>
+        <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+    </property>
+    <property>
+        <name>spark2SqlQueryExecutionListeners</name>
+        <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+    </property>
    <property>
        <name>oozie.launcher.mapreduce.user.classpath.first</name>
        <value>true</value>
    </property>
    <property>
-        <name>hive_metastore_uris</name>
-        <value>thrift://hadoop-edge2.garr-pa1.d4science.org:9083</value>
-    </property>
-    <property>
-        <name>spark2YarnHistoryServerAddress</name>
-        <value>http://hadoop-edge1.garr-pa1.d4science.org:18089/</value>
+        <name>sparkExecutorNumber</name>
+        <value>4</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/spark2ApplicationHistory</value>
    </property>
    <property>
-        <name>spark2ExtraListeners</name>
-        <value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
+        <name>sparkDriverMemory</name>
+        <value>15G</value>
    </property>
    <property>
-        <name>spark2SqlQueryExecutionListeners</name>
-        <value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
+        <name>sparkExecutorMemory</name>
+        <value>6G</value>
+    </property>
+    <property>
+        <name>sparkExecutorCores</name>
+        <value>1</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipfinder/oozie_app/workflow.xml
@ -0,0 +1,171 @@
+<workflow-app name="BipFinderScore" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>inputPath</name>
+            <description>the input path of the resources to be extended</description>
+        </property>
+
+        <property>
+            <name>bipScorePath</name>
+            <description>the path where to find the bipFinder scores</description>
+        </property>
+        <property>
+            <name>outputPath</name>
+            <description>the path where to store the actionset</description>
+        </property>
+    </parameters>
+
+    <start to="deleteoutputpath"/>
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <action name="deleteoutputpath">
+        <fs>
+            <delete path='${outputPath}'/>
+            <mkdir path='${outputPath}'/>
+            <delete path='${workingDir}'/>
+            <mkdir path='${workingDir}'/>
+        </fs>
+        <ok to="atomicactions"/>
+        <error to="Kill"/>
+    </action>
+
+    <fork name="atomicactions">
+        <path start="atomicactions_publication"/>
+        <path start="atomicactions_dataset"/>
+        <path start="atomicactions_orp"/>
+        <path start="atomicactions_software"/>
+    </fork>
+
+    <action name="atomicactions_publication">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the bip finder scores for publications</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/publication</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
+            <arg>--bipScorePath</arg><arg>${bipScorePath}</arg>
+        </spark>
+        <ok to="join_aa"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="atomicactions_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the bip finder scores for datasets</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/dataset</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
+            <arg>--bipScorePath</arg><arg>${bipScorePath}</arg>
+        </spark>
+        <ok to="join_aa"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="atomicactions_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the bip finder scores for orp</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/otherresearchproduct</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
+            <arg>--bipScorePath</arg><arg>${bipScorePath}</arg>
+        </spark>
+        <ok to="join_aa"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="atomicactions_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Produces the atomic action with the bip finder scores for software</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${inputPath}/software</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/software</arg>
+            <arg>--bipScorePath</arg><arg>${bipScorePath}</arg>
+        </spark>
+        <ok to="join_aa"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name="join_aa" to="collectandsave"/>
+
+    <action name="collectandsave">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>saves all the aa produced for the several types of results in the as output path</name>
+            <class>eu.dnetlib.dhp.actionmanager.bipfinder.CollectAndSave</class>
+            <jar>dhp-aggregation-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--inputPath</arg><arg>${workingDir}</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java
@ -0,0 +1,331 @@
+
+package eu.dnetlib.dhp.actionmanager.bipfinder;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.ForeachFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import scala.Tuple2;
+
+public class SparkAtomicActionScoreJobTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+	private static final Logger log = LoggerFactory
+		.getLogger(SparkAtomicActionScoreJobTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(SparkAtomicActionScoreJobTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(SparkAtomicActionScoreJobTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(SparkAtomicActionScoreJobTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	public void matchOne() throws Exception {
+		String bipScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
+			.getPath();
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json")
+			.getPath();
+
+		SparkAtomicActionScoreJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-bipScorePath",
+					bipScoresPath,
+					"-resultTableName",
+					"eu.dnetlib.dhp.schema.oaf.Publication",
+					"-outputPath",
+					workingDir.toString() + "/actionSet"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Publication> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Publication) aa.getPayload()));
+
+		Assertions.assertTrue(tmp.count() == 1);
+
+		Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
+		verificationDataset.createOrReplaceTempView("publication");
+
+		Dataset<Row> execVerification = spark
+			.sql(
+				"Select p.id oaid, mes.id, mUnit.value from publication p " +
+					"lateral view explode(measures) m as mes " +
+					"lateral view explode(mes.unit) u as mUnit ");
+
+		Assertions.assertEquals(2, execVerification.count());
+
+		Assertions
+			.assertEquals(
+				"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb",
+				execVerification.select("oaid").collectAsList().get(0).getString(0));
+
+		Assertions
+			.assertEquals(
+				"1.47565045883e-08",
+				execVerification.filter("id = 'influence'").select("value").collectAsList().get(0).getString(0));
+
+		Assertions
+			.assertEquals(
+				"0.227515392",
+				execVerification.filter("id = 'popularity'").select("value").collectAsList().get(0).getString(0));
+
+	}
+
+	@Test
+	public void matchOneWithTwo() throws Exception {
+		String bipScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
+			.getPath();
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json")
+			.getPath();
+
+		SparkAtomicActionScoreJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-bipScorePath",
+					bipScoresPath,
+					"-resultTableName",
+					"eu.dnetlib.dhp.schema.oaf.Publication",
+					"-outputPath",
+					workingDir.toString() + "/actionSet"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Publication> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Publication) aa.getPayload()));
+
+		Assertions.assertTrue(tmp.count() == 1);
+
+		Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
+		verificationDataset.createOrReplaceTempView("publication");
+
+		Dataset<Row> execVerification = spark
+			.sql(
+				"Select p.id oaid, mes.id, mUnit.value from publication p " +
+					"lateral view explode(measures) m as mes " +
+					"lateral view explode(mes.unit) u as mUnit ");
+
+		Assertions.assertEquals(4, execVerification.count());
+
+		Assertions
+			.assertEquals(
+				"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb",
+				execVerification.select("oaid").collectAsList().get(0).getString(0));
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("id = 'influence'").count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("id = 'popularity'").count());
+
+		List<Row> tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList();
+		String tmp_influence = tmp_ds.get(0).getString(0);
+		Assertions
+			.assertTrue(
+				"1.47565045883e-08".equals(tmp_influence) ||
+					"1.98956540239e-08".equals(tmp_influence));
+
+		tmp_influence = tmp_ds.get(1).getString(0);
+		Assertions
+			.assertTrue(
+				"1.47565045883e-08".equals(tmp_influence) ||
+					"1.98956540239e-08".equals(tmp_influence));
+
+		Assertions.assertTrue(!tmp_ds.get(0).getString(0).equals(tmp_ds.get(1).getString(0)));
+
+	}
+
+	@Test
+	public void matchTwo() throws Exception {
+		String bipScoresPath = getClass()
+			.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
+			.getPath();
+		String inputPath = getClass()
+			.getResource(
+				"/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json")
+			.getPath();
+
+		SparkAtomicActionScoreJob
+			.main(
+				new String[] {
+					"-isSparkSessionManaged",
+					Boolean.FALSE.toString(),
+					"-inputPath",
+					inputPath,
+					"-bipScorePath",
+					bipScoresPath,
+					"-resultTableName",
+					"eu.dnetlib.dhp.schema.oaf.Publication",
+					"-outputPath",
+					workingDir.toString() + "/actionSet"
+				});
+
+		final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+		JavaRDD<Publication> tmp = sc
+			.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
+			.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
+			.map(aa -> ((Publication) aa.getPayload()));
+
+		Assertions.assertTrue(tmp.count() == 2);
+
+		Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
+		verificationDataset.createOrReplaceTempView("publication");
+
+		Dataset<Row> execVerification = spark
+			.sql(
+				"Select p.id oaid, mes.id, mUnit.value from publication p " +
+					"lateral view explode(measures) m as mes " +
+					"lateral view explode(mes.unit) u as mUnit ");
+
+		Assertions.assertEquals(4, execVerification.count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb'").count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09'").count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("id = 'influence'").count());
+
+		Assertions
+			.assertEquals(
+				2,
+				execVerification.filter("id = 'popularity'").count());
+
+		Assertions
+			.assertEquals(
+				"1.47565045883e-08",
+				execVerification
+					.filter(
+						"oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " +
+							"and id = 'influence'")
+					.select("value")
+					.collectAsList()
+					.get(0)
+					.getString(0));
+
+		Assertions
+			.assertEquals(
+				"1.98956540239e-08",
+				execVerification
+					.filter(
+						"oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " +
+							"and id = 'influence'")
+					.select("value")
+					.collectAsList()
+					.get(0)
+					.getString(0));
+
+		Assertions
+			.assertEquals(
+				"0.282046161584",
+				execVerification
+					.filter(
+						"oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " +
+							"and id = 'popularity'")
+					.select("value")
+					.collectAsList()
+					.get(0)
+					.getString(0));
+
+		Assertions
+			.assertEquals(
+				"0.227515392",
+				execVerification
+					.filter(
+						"oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " +
+							"and id = 'popularity'")
+					.select("value")
+					.collectAsList()
+					.get(0)
+					.getString(0));
+
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java
@ -4,8 +4,13 @@ package eu.dnetlib.dhp.broker.oa;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;

+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
@ -13,6 +18,8 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.slf4j.Logger;
@ -29,7 +36,7 @@ import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
 public class PartitionEventsByDsIdJob {

 	private static final Logger log = LoggerFactory.getLogger(PartitionEventsByDsIdJob.class);
-	private static final String OPENDOAR_NSPREFIX = "opendoar____::";
+	private static final String OPENDOAR_NSPREFIX = "10|opendoar____::";

 	public static void main(final String[] args) throws Exception {

@ -37,7 +44,7 @@ public class PartitionEventsByDsIdJob {
 			IOUtils
 				.toString(
 					PartitionEventsByDsIdJob.class
-						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
+						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json")));
 		parser.parseArgument(args);

 		final Boolean isSparkSessionManaged = Optional
@ -54,14 +61,32 @@ public class PartitionEventsByDsIdJob {
 		final String partitionPath = parser.get("workingPath") + "/eventsByOpendoarId";
 		log.info("partitionPath: {}", partitionPath);

+		final String opendoarIds = parser.get("opendoarIds");
+		log.info("opendoarIds: {}", opendoarIds);
+
+		final Set<String> validOpendoarIds = new HashSet<>();
+		if (!opendoarIds.trim().equals("-")) {
+			validOpendoarIds
+				.addAll(
+					Arrays
+						.stream(opendoarIds.split(","))
+						.map(String::trim)
+						.filter(StringUtils::isNotBlank)
+						.map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s))
+						.collect(Collectors.toSet()));
+		}
+		log.info("validOpendoarIds: {}", validOpendoarIds);
+
 		runWithSparkSession(conf, isSparkSessionManaged, spark -> {

 			ClusterUtils
 				.readPath(spark, eventsPath, Event.class)
-				.filter(e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId()))
-				.filter(e -> e.getMap().getTargetDatasourceId().contains(OPENDOAR_NSPREFIX))
-				.limit(10000)
-				.map(e -> messageFromNotification(e), Encoders.bean(ShortEventMessageWithGroupId.class))
+				.filter((FilterFunction<Event>) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId()))
+				.filter((FilterFunction<Event>) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX))
+				.filter((FilterFunction<Event>) e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId()))
+				.map(
+					(MapFunction<Event, ShortEventMessageWithGroupId>) e -> messageFromNotification(e),
+					Encoders.bean(ShortEventMessageWithGroupId.class))
 				.coalesce(1)
 				.write()
 				.partitionBy("group")
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java
@ -5,12 +5,16 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;

+import org.apache.commons.lang3.StringUtils;
+
 import eu.dnetlib.broker.objects.OaBrokerMainEntity;
 import eu.dnetlib.dhp.broker.model.Topic;
 import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;

 public class EnrichMissingAbstract extends UpdateMatcher<String> {

+	private static final int MIN_LENGTH = 200;
+
 	public EnrichMissingAbstract() {
 		super(1,
 			s -> Topic.ENRICH_MISSING_ABSTRACT,
@ -21,10 +25,15 @@ public class EnrichMissingAbstract extends UpdateMatcher<String> {
 	@Override
 	protected List<String> findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) {
 		if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
-			return Arrays.asList(source.getAbstracts().get(0));
-		} else {
-			return new ArrayList<>();
+			return source
+				.getAbstracts()
+				.stream()
+				.filter(s -> StringUtils.normalizeSpace(s).length() >= MIN_LENGTH)
+				.map(Arrays::asList)
+				.findFirst()
+				.orElse(new ArrayList<>());
 		}
+		return new ArrayList<>();
 	}

 }
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json
@ -0,0 +1,14 @@
+[
+	{
+		"paramName": "o",
+		"paramLongName": "workingPath",
+		"paramDescription": "the path where the temporary data will be stored",
+		"paramRequired": true
+	},
+	{
+		"paramName": "list",
+		"paramLongName": "opendoarIds",
+		"paramDescription": "the opendoar IDs whitelist (comma separated)",
+		"paramRequired": true
+	}
+]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/config-default.xml
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml
@ -1,60 +1,13 @@
-<workflow-app name="create broker events - partial" xmlns="uri:oozie:workflow:0.5">
+<workflow-app name="partitionEventsByOpendoarIds" xmlns="uri:oozie:workflow:0.5">

    <parameters>
        <property>
-            <name>graphInputPath</name>
-            <description>the path where the graph is stored</description>
+            <name>opendoarIds</name>
+            <description>the opendoar IDs whitelist (comma separated)</description>
        </property>
        <property>
            <name>workingPath</name>
            <description>the path where the the generated data will be stored</description>
-        </property>
-		<property>
-            <name>datasourceIdWhitelist</name>
-            <value>-</value>
-            <description>a white list (comma separeted, - for empty list) of datasource ids</description>
-        </property>
-		<property>
-            <name>datasourceTypeWhitelist</name>
-            <value>-</value>
-            <description>a white list (comma separeted, - for empty list) of datasource types</description>
-        </property>
-		<property>
-            <name>datasourceIdBlacklist</name>
-            <value>-</value>
-            <description>a black list (comma separeted, - for empty list) of datasource ids</description>
-        </property>
-        <property>
-            <name>esEventIndexName</name>
-            <description>the elasticsearch index name for events</description>
-        </property>
-        <property>
-            <name>esNotificationsIndexName</name>
-            <description>the elasticsearch index name for notifications</description>
-        </property>
-        <property>
-            <name>esIndexHost</name>
-            <description>the elasticsearch host</description>
-        </property>
-        <property>
-        	<name>maxIndexedEventsForDsAndTopic</name>
-        	<description>the max number of events for each couple (ds/topic)</description>
-        </property>
-        <property>
-        	<name>brokerApiBaseUrl</name>
-        	<description>the url of the broker service api</description>
-        </property>
-        <property>
-        	<name>brokerDbUrl</name>
-        	<description>the url of the broker database</description>
-        </property>
-        <property>
-        	<name>brokerDbUser</name>
-        	<description>the user of the broker database</description>
-        </property>
-        <property>
-        	<name>brokerDbPassword</name>
-        	<description>the password of the broker database</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
@ -111,13 +64,13 @@
        </configuration>
    </global>

-    <start to="partition"/>
+    <start to="opendoarPartition"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    
-   <action name="partition">
+   <action name="opendoarPartition">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
@ -134,8 +87,8 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--opendoarIds</arg><arg>${opendoarIds}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-doiboost/pom.xml
+++ b/dhp-workflows/dhp-doiboost/pom.xml
@ -14,7 +14,7 @@
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
-                <version>4.0.1</version>
+                <version>${net.alchim31.maven.version}</version>
                <executions>
                    <execution>
                        <id>scala-compile-first</id>
@ -51,7 +51,6 @@
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
-            <version>4.3.4</version>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
@ -84,6 +83,11 @@
            <artifactId>spark-sql_2.11</artifactId>
        </dependency>

+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-text</artifactId>
+        </dependency>
+


    </dependencies>
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -62,7 +62,7 @@ object SparkGenerateDoiBoost {
    val orcidPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
    fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/secondJoin")

-    logger.info("Phase 3) Join Result with MAG")
+    logger.info("Phase 4) Join Result with MAG")
    val sj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))

    val magPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -15,7 +15,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.matching.Regex

-case class CrossrefDT(doi: String, json:String) {}
+case class CrossrefDT(doi: String, json:String, timestamp: Long) {}

 case class mappingAffiliation(name: String) {}

@ -206,7 +206,7 @@ case object Crossref2Oaf {
    a.setSurname(family)
    a.setFullname(s"$given $family")
    if (StringUtils.isNotBlank(orcid))
-      a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava)
+      a.setPid(List(createSP(orcid, ORCID, PID_TYPES, generateDataInfo())).asJava)

    a
  }
@ -254,7 +254,7 @@ case object Crossref2Oaf {


    def snsfRule(award:String): String = {
-      var tmp1 = StringUtils.substringAfter(award,"_")
+      val tmp1 = StringUtils.substringAfter(award,"_")
      val tmp2 = StringUtils.substringBefore(tmp1,"/")
      logger.debug(s"From $award to $tmp2")
      tmp2
@ -271,18 +271,20 @@ case object Crossref2Oaf {
    }


-    def generateRelation(sourceId:String, targetId:String, nsPrefix:String) :Relation = {
+    def generateRelation(sourceId:String, targetId:String, relClass:String) :Relation = {

      val r = new Relation
      r.setSource(sourceId)
-      r.setTarget(s"40|$nsPrefix::$targetId")
+      r.setTarget(targetId)
      r.setRelType("resultProject")
-      r.setRelClass("isProducedBy")
+      r.setRelClass(relClass)
      r.setSubRelType("outcome")
      r.setCollectedfrom(List(cf).asJava)
      r.setDataInfo(di)
      r.setLastupdatetimestamp(ts)
      r
+
+
    }


@ -290,12 +292,18 @@ case object Crossref2Oaf {
      if (funder.award.isDefined && funder.award.get.nonEmpty)
        funder.award.get.map(extractField).filter(a => a!= null &&  a.nonEmpty).foreach(
          award => {
-            val targetId = DHPUtils.md5(award)
-            queue += generateRelation(sourceId, targetId, nsPrefix)
+            val targetId = getProjectId(nsPrefix, DHPUtils.md5(award))
+            queue += generateRelation(sourceId, targetId , "isProducedBy")
+            queue += generateRelation(targetId , sourceId,  "produces")
          }
        )
    }

+    def getProjectId (nsPrefix:String, targetId:String):String = {
+      s"40|$nsPrefix::$targetId"
+    }
+
+
    if (funders != null)
    funders.foreach(funder => {
      if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
@ -316,22 +324,33 @@ case object Crossref2Oaf {
          case "10.13039/501100002341" =>   generateSimpleRelationFromAward(funder, "aka_________", a => a)
          case "10.13039/501100001602" =>   generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", ""))
          case "10.13039/501100000923" =>   generateSimpleRelationFromAward(funder, "arc_________", a => a)
-          case "10.13039/501100000038"=>    queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "nserc_______" )
-          case "10.13039/501100000155"=>    queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "sshrc_______" )
-          case "10.13039/501100000024"=>    queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "cihr________" )
+          case "10.13039/501100000038"=>    val targetId = getProjectId("nserc_______" , "1e5e62235d094afd01cd56e65112fc63")
+                                            queue += generateRelation(sourceId, targetId, "isProducedBy" )
+                                            queue += generateRelation(targetId, sourceId, "produces" )
+          case "10.13039/501100000155"=>    val targetId = getProjectId("sshrc_______" , "1e5e62235d094afd01cd56e65112fc63")
+                                            queue += generateRelation(sourceId,targetId, "isProducedBy" )
+                                            queue += generateRelation(targetId,sourceId, "produces" )
+          case "10.13039/501100000024"=>    val targetId = getProjectId("cihr________" , "1e5e62235d094afd01cd56e65112fc63")
+                                            queue += generateRelation(sourceId,targetId, "isProducedBy" )
+                                            queue += generateRelation(targetId,sourceId, "produces" )
          case "10.13039/501100002848" =>   generateSimpleRelationFromAward(funder, "conicytf____", a => a)
          case "10.13039/501100003448" =>   generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
          case "10.13039/501100010198" =>   generateSimpleRelationFromAward(funder, "sgov________", a=>a)
          case "10.13039/501100004564" =>   generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
          case "10.13039/501100003407" =>   generateSimpleRelationFromAward(funder, "miur________", a=>a)
-                                            queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "miur________" )
+                                            val targetId = getProjectId("miur________" , "1e5e62235d094afd01cd56e65112fc63")
+                                            queue += generateRelation(sourceId,targetId, "isProducedBy" )
+                                            queue += generateRelation(targetId,sourceId, "produces" )
          case "10.13039/501100006588" |
                "10.13039/501100004488" =>  generateSimpleRelationFromAward(funder, "irb_hr______", a=>a.replaceAll("Project No.", "").replaceAll("HRZZ-","") )
          case "10.13039/501100006769"=>    generateSimpleRelationFromAward(funder, "rsf_________", a=>a)
          case "10.13039/501100001711"=>    generateSimpleRelationFromAward(funder, "snsf________", snsfRule)
          case "10.13039/501100004410"=>    generateSimpleRelationFromAward(funder, "tubitakf____", a =>a)
          case "10.10.13039/100004440"=>    generateSimpleRelationFromAward(funder, "wt__________", a =>a)
-          case "10.13039/100004440"=>       queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "wt__________" )
+          case "10.13039/100004440"=>       val targetId = getProjectId("wt__________" , "1e5e62235d094afd01cd56e65112fc63")
+                                            queue += generateRelation(sourceId,targetId, "isProducedBy" )
+                                            queue += generateRelation(targetId,sourceId, "produces" )
+
          case _ =>                         logger.debug("no match for "+funder.DOI.get )


@ -347,7 +366,9 @@ case object Crossref2Oaf {
          case "The French National Research Agency (ANR)" |
               "The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
          case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)
-          case "Wellcome Trust Masters Fellowship" => queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "wt__________" )
+          case "Wellcome Trust Masters Fellowship" =>  val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63")
+                                                        queue +=  generateRelation(sourceId, targetId, "isProducedBy" )
+                                                        queue +=  generateRelation(targetId, sourceId, "produces" )
          case _ =>                         logger.debug("no match for "+funder.name )

        }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala
@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.crossref

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import org.apache.commons.io.IOUtils
+import org.apache.hadoop.io.{IntWritable, Text}
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
@ -12,21 +13,23 @@ import org.slf4j.{Logger, LoggerFactory}

 object CrossrefDataset {

+  val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)

-  def extractTimestamp(input:String): Long = {
+
+  def to_item(input:String):CrossrefDT = {

    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: json4s.JValue = parse(input)
-
-    (json\"indexed"\"timestamp").extractOrElse[Long](0)
+    val ts:Long = (json \ "indexed" \ "timestamp").extract[Long]
+    val doi:String  = (json \ "DOI").extract[String]
+    CrossrefDT(doi, input, ts)

  }

-
  def main(args: Array[String]): Unit = {


-    val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)
+
    val conf: SparkConf = new SparkConf()
    val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json")))
    parser.parseArgument(args)
@ -49,9 +52,8 @@ object CrossrefDataset {
        if (a == null)
          return b

-        val tb = extractTimestamp(b.json)
-        val ta = extractTimestamp(a.json)
-        if(ta >tb) {
+
+        if(a.timestamp >b.timestamp) {
          return a
        }
        b
@ -63,9 +65,7 @@ object CrossrefDataset {
        if (a == null)
          return b

-        val tb = extractTimestamp(b.json)
-        val ta = extractTimestamp(a.json)
-        if(ta >tb) {
+        if(a.timestamp >b.timestamp) {
          return a
        }
        b
@ -78,15 +78,21 @@ object CrossrefDataset {
      override def finish(reduction: CrossrefDT): CrossrefDT = reduction
    }

-    val sourcePath:String = parser.get("sourcePath")
-    val targetPath:String = parser.get("targetPath")
+    val workingPath:String = parser.get("workingPath")

-    val ds:Dataset[CrossrefDT] = spark.read.load(sourcePath).as[CrossrefDT]

-    ds.groupByKey(_.doi)
+    val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT]
+
+
+    val update =
+      spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update",  classOf[IntWritable], classOf[Text])
+        .map(i =>CrossrefImporter.decompressBlob(i._2.toString))
+        .map(i =>to_item(i)))
+
+    main_ds.union(update).groupByKey(_.doi)
      .agg(crossrefAggregator.toColumn)
      .map(s=>s._2)
-      .write.mode(SaveMode.Overwrite).save(targetPath)
+      .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated")

  }

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala
@ -29,69 +29,26 @@ object SparkMapDumpIntoOAF {
        .appName(SparkMapDumpIntoOAF.getClass.getSimpleName)
        .master(parser.get("master")).getOrCreate()

+    implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
    implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
    implicit val mapEncoderRelatons: Encoder[Relation] = Encoders.kryo[Relation]
    implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]

-    val sc = spark.sparkContext
    val targetPath = parser.get("targetPath")
+    import spark.implicits._

+    spark.read.load(parser.get("sourcePath")).as[CrossrefDT]
+      .flatMap(k => Crossref2Oaf.convert(k.json))
+      .filter(o => o != null)
+      .write.mode(SaveMode.Overwrite).save(s"$targetPath/mixObject")

-    sc.sequenceFile(parser.get("sourcePath"), classOf[IntWritable], classOf[Text])
-      .map(k => k._2.toString).map(CrossrefImporter.decompressBlob)
-      .flatMap(k => Crossref2Oaf.convert(k)).saveAsObjectFile(s"${targetPath}/mixObject")
+    val ds:Dataset[Oaf] = spark.read.load(s"$targetPath/mixObject").as[Oaf]

-    val inputRDD = sc.objectFile[Oaf](s"${targetPath}/mixObject").filter(p=> p!= null)
+    ds.filter(o => o.isInstanceOf[Publication]).map(o => o.asInstanceOf[Publication]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefPublication")

-    val distinctPubs:RDD[Publication] = inputRDD.filter(k => k != null && k.isInstanceOf[Publication])
-      .map(k => k.asInstanceOf[Publication]).map { p: Publication => Tuple2(p.getId, p) }.reduceByKey { case (p1: Publication, p2: Publication) =>
-      var r = if (p1 == null) p2 else p1
-      if (p1 != null && p2 != null) {
-        if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) {
-          if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp)
-            r = p2
-          else
-            r = p1
-        } else {
-          r = if (p1.getLastupdatetimestamp == null) p2 else p1
-        }
-      }
-      r
-    }.map(_._2)
+    ds.filter(o => o.isInstanceOf[Relation]).map(o => o.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefRelation")

-    val pubs:Dataset[Publication] = spark.createDataset(distinctPubs)
-    pubs.write.mode(SaveMode.Overwrite).save(s"${targetPath}/publication")
-
-
-    val distincDatasets:RDD[OafDataset] = inputRDD.filter(k => k != null && k.isInstanceOf[OafDataset])
-      .map(k => k.asInstanceOf[OafDataset]).map(p => Tuple2(p.getId, p)).reduceByKey { case (p1: OafDataset, p2: OafDataset) =>
-      var r = if (p1 == null) p2 else p1
-      if (p1 != null && p2 != null) {
-        if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) {
-          if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp)
-            r = p2
-          else
-            r = p1
-        } else {
-          r = if (p1.getLastupdatetimestamp == null) p2 else p1
-        }
-      }
-      r
-    }.map(_._2)
-
-    spark.createDataset(distincDatasets).write.mode(SaveMode.Overwrite).save(s"${targetPath}/dataset")
-
-
-
-    val distinctRels =inputRDD.filter(k => k != null && k.isInstanceOf[Relation])
-      .map(k => k.asInstanceOf[Relation]).map(r=> (s"${r.getSource}::${r.getTarget}",r))
-      .reduceByKey { case (p1: Relation, p2: Relation) =>
-        if (p1 == null) p2 else p1
-      }.map(_._2)
-
-    val rels: Dataset[Relation] = spark.createDataset(distinctRels)
-
-    rels.write.mode(SaveMode.Overwrite).save(s"${targetPath}/relations")
+    ds.filter(o => o.isInstanceOf[OafDataset]).map(o => o.asInstanceOf[OafDataset]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefDataset")
  }


--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala
@ -21,15 +21,17 @@ object SparkImportMagIntoDataset {


  val stream = Map(
-    "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
-    "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
-    "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
+    "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
+    "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")),
+    "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")),
+    "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
    "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
    "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")),
    "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")),
    "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")),
-    "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
-    "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
+    //                                                         ['FieldOfStudyId:long', 'Rank:uint', 'NormalizedName:string', 'DisplayName:string', 'MainType:string', 'Level:int', 'PaperCount:long', 'PaperFamilyCount:long', 'CitationCount:long', 'CreatedDate:DateTime']
+    "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")),
+    "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")),
    "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")),
    "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")),
    "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")),
@ -39,7 +41,7 @@ object SparkImportMagIntoDataset {
    "PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")),
    "PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")),
    "PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")),
-    "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "CreatedDate:DateTime")),
+    "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "OnlineDate:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "FamilyRank:uint?", "CreatedDate:DateTime")),
    "RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float"))
  )

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala
@ -26,12 +26,15 @@ object SparkPreProcessMAG {
        .master(parser.get("master")).getOrCreate()

    val sourcePath = parser.get("sourcePath")
+    val workingPath = parser.get("workingPath")
+    val targetPath = parser.get("targetPath")
+
    import spark.implicits._
    implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication]
    implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)

    logger.info("Phase 1) make uninque DOI in Papers:")
-    val d: Dataset[MagPapers] = spark.read.load(s"${parser.get("sourcePath")}/Papers").as[MagPapers]
+    val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers]

    // Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one
    val result: RDD[MagPapers] = d.where(col("Doi").isNotNull)
@ -41,11 +44,12 @@ object SparkPreProcessMAG {
      .map(_._2)

    val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
-    distinctPaper.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/Papers_distinct")
+
+    distinctPaper.write.mode(SaveMode.Overwrite).save(s"$workingPath/Papers_distinct")

    logger.info("Phase 0) Enrich Publication with description")
-    val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
-    pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")
+    val pa = spark.read.load(s"$sourcePath/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
+    pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"$workingPath/PaperAbstract")

    logger.info("Phase 3) Group Author by PaperId")
    val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor]
@ -64,24 +68,24 @@ object SparkPreProcessMAG {
        } else
          mpa
      }).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors"))
-      .write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_1_paper_authors")
+      .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_1_paper_authors")

    logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors")

    val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal]

-    val papers = spark.read.load((s"${parser.get("targetPath")}/Papers_distinct")).as[MagPapers]
+    val papers = spark.read.load((s"$workingPath/Papers_distinct")).as[MagPapers]

-    val paperWithAuthors = spark.read.load(s"${parser.get("targetPath")}/merge_step_1_paper_authors").as[MagPaperWithAuthorList]
+    val paperWithAuthors = spark.read.load(s"$workingPath/merge_step_1_paper_authors").as[MagPaperWithAuthorList]

    val firstJoin = papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left")
    firstJoin.joinWith(paperWithAuthors, firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), "left")
      .map { a => ConversionUtil.createOAFFromJournalAuthorPaper(a) }
-      .write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_2")
+      .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_2")


    var magPubs: Dataset[(String, Publication)] =
-      spark.read.load(s"${parser.get("targetPath")}/merge_step_2").as[Publication]
+      spark.read.load(s"$workingPath/merge_step_2").as[Publication]
      .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]


@ -95,10 +99,10 @@ object SparkPreProcessMAG {
      .map(item => ConversionUtil.updatePubsWithConferenceInfo(item))
      .write
      .mode(SaveMode.Overwrite)
-      .save(s"${parser.get("targetPath")}/merge_step_2_conference")
+      .save(s"$workingPath/merge_step_2_conference")


-    magPubs= spark.read.load(s"${parser.get("targetPath")}/merge_step_2_conference").as[Publication]
+    magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication]
      .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]

    val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl]
@ -108,27 +112,27 @@ object SparkPreProcessMAG {
    magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left")
      .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) }
      .write.mode(SaveMode.Overwrite)
-      .save(s"${parser.get("targetPath")}/merge_step_3")
+      .save(s"$workingPath/merge_step_3")


 //    logger.info("Phase 6) Enrich Publication with description")
 //    val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
 //    pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")

-    val paperAbstract = spark.read.load((s"${parser.get("targetPath")}/PaperAbstract")).as[MagPaperAbstract]
+    val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract]


-    magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_3").as[Publication]
+    magPubs = spark.read.load(s"$workingPath/merge_step_3").as[Publication]
      .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]

    magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left")
      .map(item => ConversionUtil.updatePubsWithDescription(item)
-    ).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_4")
+    ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4")


    logger.info("Phase 7) Enrich Publication with FieldOfStudy")

-    magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_4").as[Publication]
+    magPubs = spark.read.load(s"$workingPath/merge_step_4").as[Publication]
      .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]

    val fos = spark.read.load(s"$sourcePath/FieldsOfStudy").select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType")
@ -144,14 +148,14 @@ object SparkPreProcessMAG {
      .equalTo(paperField("PaperId")), "left")
      .map(item => ConversionUtil.updatePubsWithSubject(item))
      .write.mode(SaveMode.Overwrite)
-      .save(s"${parser.get("targetPath")}/mag_publication")
+      .save(s"$workingPath/mag_publication")


-    val s:RDD[Publication] = spark.read.load(s"${parser.get("targetPath")}/mag_publication").as[Publication]
+    val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication]
      .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b))
    .map(_._2)

-    spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/mag_publication_u")
+    spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication")

  }
 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java
@ -17,11 +17,12 @@ import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.io.compress.GzipCodec;
 import org.mortbay.log.Log;

-import eu.dnetlib.doiboost.orcid.json.JsonWriter;
 import eu.dnetlib.doiboost.orcid.model.WorkData;
 import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser;
+import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter;

 public class ActivitiesDecompressor {

@ -143,4 +144,64 @@ public class ActivitiesDecompressor {
 		Log.info("Error from Orcid found: " + errorFromOrcidFound);
 		Log.info("Error parsing xml work found: " + xmlParserErrorFound);
 	}
+
+	public static void extractXML(Configuration conf, String inputUri, Path outputPath)
+		throws Exception {
+		String uri = inputUri;
+		FileSystem fs = FileSystem.get(URI.create(uri), conf);
+		Path inputPath = new Path(uri);
+		CompressionCodecFactory factory = new CompressionCodecFactory(conf);
+		CompressionCodec codec = factory.getCodec(inputPath);
+		if (codec == null) {
+			System.err.println("No codec found for " + uri);
+			System.exit(1);
+		}
+		CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
+		InputStream gzipInputStream = null;
+		try {
+			gzipInputStream = codec.createInputStream(fs.open(inputPath));
+			int counter = 0;
+			try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
+				TarArchiveEntry entry = null;
+				try (SequenceFile.Writer writer = SequenceFile
+					.createWriter(
+						conf,
+						SequenceFile.Writer.file(outputPath),
+						SequenceFile.Writer.keyClass(Text.class),
+						SequenceFile.Writer.valueClass(Text.class),
+						SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) {
+					while ((entry = tais.getNextTarEntry()) != null) {
+						String filename = entry.getName();
+						if (entry.isDirectory() || !filename.contains("works")) {
+						} else {
+							counter++;
+							BufferedReader br = new BufferedReader(new InputStreamReader(tais));
+							String line;
+							StringBuffer buffer = new StringBuffer();
+							while ((line = br.readLine()) != null) {
+								buffer.append(line);
+							}
+							String xml = buffer.toString();
+							String[] filenameParts = filename.split("/");
+							final Text key = new Text(
+								XMLRecordParser
+									.retrieveOrcidIdFromActivity(
+										xml.getBytes(), filenameParts[filenameParts.length - 1]));
+							final Text value = new Text(xml);
+							writer.append(key, value);
+							if ((counter % 100000) == 0) {
+								Log.info("Current xml works extracted: " + counter);
+							}
+						}
+					}
+				}
+			}
+			Log.info("Activities extraction completed");
+			Log.info("Total XML works parsed: " + counter);
+		} finally {
+			Log.debug("Closing gzip stream");
+			IOUtils.closeStream(gzipInputStream);
+		}
+	}
+
 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java
@ -0,0 +1,54 @@
+
+package eu.dnetlib.doiboost.orcid;
+
+import java.io.IOException;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.mortbay.log.Log;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork;
+
+public class ExtractXMLActivitiesData extends OrcidDSManager {
+	private String outputWorksPath;
+	private String activitiesFileNameTarGz;
+
+	public static void main(String[] args) throws IOException, Exception {
+		ExtractXMLActivitiesData extractXMLActivitiesData = new ExtractXMLActivitiesData();
+		extractXMLActivitiesData.loadArgs(args);
+		extractXMLActivitiesData.extractWorks();
+	}
+
+	private void loadArgs(String[] args) throws IOException, Exception {
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					GenOrcidAuthorWork.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json")));
+		parser.parseArgument(args);
+
+		hdfsServerUri = parser.get("hdfsServerUri");
+		Log.info("HDFS URI: " + hdfsServerUri);
+		workingPath = parser.get("workingPath");
+		Log.info("Working Path: " + workingPath);
+		activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz");
+		Log.info("Activities File Name: " + activitiesFileNameTarGz);
+		outputWorksPath = parser.get("outputWorksPath");
+		Log.info("Output Author Work Data: " + outputWorksPath);
+	}
+
+	private void extractWorks() throws Exception {
+		Configuration conf = initConfigurationObject();
+		FileSystem fs = initFileSystemObject(conf);
+		String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz);
+		Path outputPath = new Path(
+			hdfsServerUri
+				.concat(workingPath)
+				.concat(outputWorksPath));
+		ActivitiesDecompressor.extractXML(conf, tarGzUri, outputPath);
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java
@ -0,0 +1,56 @@
+
+package eu.dnetlib.doiboost.orcid;
+
+import java.io.IOException;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.mortbay.log.Log;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork;
+
+public class ExtractXMLSummariesData extends OrcidDSManager {
+
+	private String outputAuthorsPath;
+	private String summariesFileNameTarGz;
+
+	public static void main(String[] args) throws IOException, Exception {
+		ExtractXMLSummariesData extractXMLSummariesData = new ExtractXMLSummariesData();
+		extractXMLSummariesData.loadArgs(args);
+		extractXMLSummariesData.extractAuthors();
+	}
+
+	private void loadArgs(String[] args) throws IOException, Exception {
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					GenOrcidAuthorWork.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json")));
+		parser.parseArgument(args);
+
+		hdfsServerUri = parser.get("hdfsServerUri");
+		Log.info("HDFS URI: " + hdfsServerUri);
+		workingPath = parser.get("workingPath");
+		Log.info("Working Path: " + workingPath);
+		summariesFileNameTarGz = parser.get("summariesFileNameTarGz");
+		Log.info("Summaries File Name: " + summariesFileNameTarGz);
+		outputAuthorsPath = parser.get("outputAuthorsPath");
+		Log.info("Output Authors Data: " + outputAuthorsPath);
+	}
+
+	public void extractAuthors() throws Exception {
+		Configuration conf = initConfigurationObject();
+		FileSystem fs = initFileSystemObject(conf);
+		String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz);
+		Path outputPath = new Path(
+			hdfsServerUri
+				.concat(workingPath)
+				.concat(outputAuthorsPath)
+				.concat("xml_authors.seq"));
+		SummariesDecompressor.extractXML(conf, tarGzUri, outputPath);
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java
@ -25,8 +25,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {
 	public void generateAuthorsDOIsData() throws Exception {
 		Configuration conf = initConfigurationObject();
 		FileSystem fs = initFileSystemObject(conf);
-		String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(activitiesFileNameTarGz);
-		Path outputPath = new Path(hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(outputAuthorsDOIsPath));
+		String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz);
+		Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputAuthorsDOIsPath));
 		ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath);
 	}

@ -41,8 +41,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {

 		hdfsServerUri = parser.get("hdfsServerUri");
 		Log.info("HDFS URI: " + hdfsServerUri);
-		hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
-		Log.info("Default Path: " + hdfsOrcidDefaultPath);
+		workingPath = parser.get("workingPath");
+		Log.info("Default Path: " + workingPath);
 		activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz");
 		Log.info("Activities File Name: " + activitiesFileNameTarGz);
 		outputAuthorsDOIsPath = parser.get("outputAuthorsDOIsPath");
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java
@ -15,7 +15,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 public class OrcidDSManager {

 	protected String hdfsServerUri;
-	protected String hdfsOrcidDefaultPath;
+	protected String workingPath;
 	private String summariesFileNameTarGz;
 	private String outputAuthorsPath;

@ -28,10 +28,10 @@ public class OrcidDSManager {
 	public void generateAuthors() throws Exception {
 		Configuration conf = initConfigurationObject();
 		FileSystem fs = initFileSystemObject(conf);
-		String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(summariesFileNameTarGz);
+		String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz);
 		Path outputPath = new Path(
 			hdfsServerUri
-				.concat(hdfsOrcidDefaultPath)
+				.concat(workingPath)
 				.concat(outputAuthorsPath)
 				.concat("authors.seq"));
 		SummariesDecompressor.parseGzSummaries(conf, tarGzUri, outputPath);
@ -41,22 +41,18 @@ public class OrcidDSManager {
 		// ====== Init HDFS File System Object
 		Configuration conf = new Configuration();
 		// Set FileSystem URI
-		conf.set("fs.defaultFS", hdfsServerUri.concat(hdfsOrcidDefaultPath));
+		conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath));
 		// Because of Maven
 		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
 		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
 		return conf;
 	}

-	protected FileSystem initFileSystemObject(Configuration conf) {
+	protected FileSystem initFileSystemObject(Configuration conf) throws IOException {
 		// Get the filesystem - HDFS
+		// if there is an exception, it will be propagate
 		FileSystem fs = null;
-		try {
-			fs = FileSystem.get(URI.create(hdfsServerUri.concat(hdfsOrcidDefaultPath)), conf);
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
+		fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf);
 		return fs;
 	}

@ -66,13 +62,13 @@ public class OrcidDSManager {
 				.toString(
 					OrcidDSManager.class
 						.getResourceAsStream(
-							"/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json")));
+							"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json")));
 		parser.parseArgument(args);

 		hdfsServerUri = parser.get("hdfsServerUri");
 		Log.info("HDFS URI: " + hdfsServerUri);
-		hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
-		Log.info("Default Path: " + hdfsOrcidDefaultPath);
+		workingPath = parser.get("workingPath");
+		Log.info("Working Path: " + workingPath);
 		summariesFileNameTarGz = parser.get("summariesFileNameTarGz");
 		Log.info("Summaries File Name: " + summariesFileNameTarGz);
 		outputAuthorsPath = parser.get("outputAuthorsPath");
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java
@ -1,14 +1,15 @@

 package eu.dnetlib.doiboost.orcid;

-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
+import java.io.*;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.List;

+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
@ -16,6 +17,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.impl.client.CloseableHttpClient;
@ -27,10 +29,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 public class OrcidDownloader extends OrcidDSManager {

 	static final int REQ_LIMIT = 24;
-//	static final int REQ_MAX_TEST = 100;
-	static final int RECORD_PARSED_COUNTER_LOG_INTERVAL = 10000;
+	static final int REQ_MAX_TEST = -1;
+	static final int RECORD_PARSED_COUNTER_LOG_INTERVAL = 500;
 	static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
-	static final String lastUpdate = "2019-09-30 00:00:00";
+	static final String lastUpdate = "2020-09-29 00:00:00";
 	private String lambdaFileName;
 	private String outputPath;
 	private String token;
@ -41,7 +43,7 @@ public class OrcidDownloader extends OrcidDSManager {
 		orcidDownloader.parseLambdaFile();
 	}

-	private String downloadRecord(String orcidId) {
+	private String downloadRecord(String orcidId) throws IOException {
 		try (CloseableHttpClient client = HttpClients.createDefault()) {
 			HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
 			httpGet.addHeader("Accept", "application/vnd.orcid+xml");
@ -49,17 +51,23 @@ public class OrcidDownloader extends OrcidDSManager {
 			CloseableHttpResponse response = client.execute(httpGet);
 			if (response.getStatusLine().getStatusCode() != 200) {
 				Log
-					.warn(
+					.info(
 						"Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
 				return new String("");
 			}
-			return IOUtils.toString(response.getEntity().getContent());
-
-		} catch (Throwable e) {
-			Log.warn("Downloading " + orcidId, e.getMessage());
-
+//			return IOUtils.toString(response.getEntity().getContent());
+			return xmlStreamToString(response.getEntity().getContent());
 		}
-		return new String("");
+	}
+
+	private String xmlStreamToString(InputStream xmlStream) throws IOException {
+		BufferedReader br = new BufferedReader(new InputStreamReader(xmlStream));
+		String line;
+		StringBuffer buffer = new StringBuffer();
+		while ((line = br.readLine()) != null) {
+			buffer.append(line);
+		}
+		return buffer.toString();
 	}

 	public void parseLambdaFile() throws Exception {
@ -69,97 +77,94 @@ public class OrcidDownloader extends OrcidDSManager {
 		long startDownload = 0;
 		Configuration conf = initConfigurationObject();
 		FileSystem fs = initFileSystemObject(conf);
-		String lambdaFileUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(lambdaFileName);
+		String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName);
 		Path hdfsreadpath = new Path(lambdaFileUri);
 		FSDataInputStream lambdaFileStream = fs.open(hdfsreadpath);
 		Path hdfsoutputPath = new Path(
 			hdfsServerUri
-				.concat(hdfsOrcidDefaultPath)
+				.concat(workingPath)
 				.concat(outputPath)
-				.concat("orcid_records.seq"));
-
-		try (SequenceFile.Writer writer = SequenceFile
-			.createWriter(
-				conf,
-				SequenceFile.Writer.file(hdfsoutputPath),
-				SequenceFile.Writer.keyClass(Text.class),
-				SequenceFile.Writer.valueClass(Text.class))) {
-
-			try (BufferedReader br = new BufferedReader(new InputStreamReader(lambdaFileStream))) {
-				String line;
-				int nReqTmp = 0;
+				.concat("updated_xml_authors.seq"));
+		try (TarArchiveInputStream tais = new TarArchiveInputStream(
+			new GzipCompressorInputStream(lambdaFileStream))) {
+			TarArchiveEntry entry = null;
+			StringBuilder sb = new StringBuilder();
+			try (SequenceFile.Writer writer = SequenceFile
+				.createWriter(
+					conf,
+					SequenceFile.Writer.file(hdfsoutputPath),
+					SequenceFile.Writer.keyClass(Text.class),
+					SequenceFile.Writer.valueClass(Text.class),
+					SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) {
 				startDownload = System.currentTimeMillis();
-				long startReqTmp = System.currentTimeMillis();
-				while ((line = br.readLine()) != null) {
-					parsedRecordsCounter++;
-					// skip headers line
-					if (parsedRecordsCounter == 1) {
-						continue;
-					}
-					String[] values = line.split(",");
-					List<String> recordInfo = Arrays.asList(values);
-					String orcidId = recordInfo.get(0);
-					if (isModified(orcidId, recordInfo.get(3))) {
-						String record = downloadRecord(orcidId);
-						downloadedRecordsCounter++;
-						if (!record.isEmpty()) {
-							String compressRecord = ArgumentApplicationParser.compressArgument(record);
-							final Text key = new Text(recordInfo.get(0));
-							final Text value = new Text(compressRecord);
-
-							try {
+				while ((entry = tais.getNextTarEntry()) != null) {
+					BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from tarInput
+					String line;
+					while ((line = br.readLine()) != null) {
+						String[] values = line.split(",");
+						List<String> recordInfo = Arrays.asList(values);
+						int nReqTmp = 0;
+						long startReqTmp = System.currentTimeMillis();
+						// skip headers line
+						if (parsedRecordsCounter == 0) {
+							parsedRecordsCounter++;
+							continue;
+						}
+						parsedRecordsCounter++;
+						String orcidId = recordInfo.get(0);
+						if (isModified(orcidId, recordInfo.get(3))) {
+							String record = downloadRecord(orcidId);
+							downloadedRecordsCounter++;
+							if (!record.isEmpty()) {
+//							String compressRecord = ArgumentApplicationParser.compressArgument(record);
+								final Text key = new Text(recordInfo.get(0));
+								final Text value = new Text(record);
 								writer.append(key, value);
 								savedRecordsCounter++;
-							} catch (IOException e) {
-								Log.warn("Writing to sequence file: " + e.getMessage());
-								Log.warn(e);
-								throw new RuntimeException(e);
+							}
+						} else {
+							break;
+						}
+						long endReq = System.currentTimeMillis();
+						nReqTmp++;
+						if (nReqTmp == REQ_LIMIT) {
+							long reqSessionDuration = endReq - startReqTmp;
+							if (reqSessionDuration <= 1000) {
+								Log
+									.info(
+										"\nreqSessionDuration: "
+											+ reqSessionDuration
+											+ " nReqTmp: "
+											+ nReqTmp
+											+ " wait ....");
+								Thread.sleep(1000 - reqSessionDuration);
+							} else {
+								nReqTmp = 0;
+								startReqTmp = System.currentTimeMillis();
+							}
+						}
+						if ((parsedRecordsCounter % RECORD_PARSED_COUNTER_LOG_INTERVAL) == 0) {
+							Log
+								.info(
+									"Current parsed: "
+										+ parsedRecordsCounter
+										+ " downloaded: "
+										+ downloadedRecordsCounter
+										+ " saved: "
+										+ savedRecordsCounter);
+							if (REQ_MAX_TEST != -1 && parsedRecordsCounter > REQ_MAX_TEST) {
+								break;
 							}
 						}
 					}
-					long endReq = System.currentTimeMillis();
-					nReqTmp++;
-					if (nReqTmp == REQ_LIMIT) {
-						long reqSessionDuration = endReq - startReqTmp;
-						if (reqSessionDuration <= 1000) {
-							Log
-								.warn(
-									"\nreqSessionDuration: "
-										+ reqSessionDuration
-										+ " nReqTmp: "
-										+ nReqTmp
-										+ " wait ....");
-							Thread.sleep(1000 - reqSessionDuration);
-						} else {
-							nReqTmp = 0;
-							startReqTmp = System.currentTimeMillis();
-						}
-					}
-
-//					if (parsedRecordsCounter > REQ_MAX_TEST) {
-//						break;
-//					}
-					if ((parsedRecordsCounter % RECORD_PARSED_COUNTER_LOG_INTERVAL) == 0) {
-						Log
-							.info(
-								"Current parsed: "
-									+ parsedRecordsCounter
-									+ " downloaded: "
-									+ downloadedRecordsCounter
-									+ " saved: "
-									+ savedRecordsCounter);
-//						if (parsedRecordsCounter > REQ_MAX_TEST) {
-//							break;
-//						}
-					}
+					long endDownload = System.currentTimeMillis();
+					long downloadTime = endDownload - startDownload;
+					Log.info("Download time: " + ((downloadTime / 1000) / 60) + " minutes");
 				}
-				long endDownload = System.currentTimeMillis();
-				long downloadTime = endDownload - startDownload;
-				Log.info("Download time: " + ((downloadTime / 1000) / 60) + " minutes");
 			}
 		}
-		lambdaFileStream.close();
 		Log.info("Download started at: " + new Date(startDownload).toString());
+		Log.info("Download ended at: " + new Date(System.currentTimeMillis()).toString());
 		Log.info("Parsed Records Counter: " + parsedRecordsCounter);
 		Log.info("Downloaded Records Counter: " + downloadedRecordsCounter);
 		Log.info("Saved Records Counter: " + savedRecordsCounter);
@ -176,8 +181,8 @@ public class OrcidDownloader extends OrcidDSManager {

 		hdfsServerUri = parser.get("hdfsServerUri");
 		Log.info("HDFS URI: " + hdfsServerUri);
-		hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
-		Log.info("Default Path: " + hdfsOrcidDefaultPath);
+		workingPath = parser.get("workingPath");
+		Log.info("Default Path: " + workingPath);
 		lambdaFileName = parser.get("lambdaFileName");
 		Log.info("Lambda File Name: " + lambdaFileName);
 		outputPath = parser.get("outputPath");
@ -185,7 +190,7 @@ public class OrcidDownloader extends OrcidDSManager {
 		token = parser.get("token");
 	}

-	private boolean isModified(String orcidId, String modifiedDate) {
+	public boolean isModified(String orcidId, String modifiedDate) {
 		Date modifiedDateDt = null;
 		Date lastUpdateDt = null;
 		try {
@ -195,7 +200,7 @@ public class OrcidDownloader extends OrcidDSManager {
 			modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate);
 			lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
 		} catch (Exception e) {
-			Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage());
+			Log.info("[" + orcidId + "] Parsing date: ", e.getMessage());
 			return true;
 		}
 		return modifiedDateDt.after(lastUpdateDt);
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala
@ -1,21 +1,72 @@
 package eu.dnetlib.doiboost.orcid

+import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
 import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.oa.merge.AuthorMerger
 import eu.dnetlib.dhp.schema.oaf.Publication
+import eu.dnetlib.dhp.schema.orcid.OrcidDOI
 import eu.dnetlib.doiboost.mag.ConversionUtil
 import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}

 object SparkConvertORCIDToOAF {
+  val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass)
+
+  def getPublicationAggregator(): Aggregator[(String, Publication), Publication, Publication] = new Aggregator[(String, Publication), Publication, Publication]{
+
+    override def zero: Publication = new Publication()
+
+    override def reduce(b: Publication, a: (String, Publication)): Publication = {
+      b.mergeFrom(a._2)
+      b.setAuthor(AuthorMerger.mergeAuthor(a._2.getAuthor, b.getAuthor))
+      if (b.getId == null)
+        b.setId(a._2.getId)
+      b
+    }


+    override def merge(wx: Publication, wy: Publication): Publication = {
+      wx.mergeFrom(wy)
+      wx.setAuthor(AuthorMerger.mergeAuthor(wy.getAuthor, wx.getAuthor))
+      if(wx.getId == null && wy.getId.nonEmpty)
+        wx.setId(wy.getId)
+      wx
+    }
+    override def finish(reduction: Publication): Publication = reduction
+
+    override def bufferEncoder: Encoder[Publication] =
+      Encoders.kryo(classOf[Publication])
+
+    override def outputEncoder: Encoder[Publication] =
+      Encoders.kryo(classOf[Publication])
+  }
+
+def run(spark:SparkSession,sourcePath:String, targetPath:String):Unit = {
+  implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
+  implicit val mapOrcid: Encoder[OrcidDOI] = Encoders.kryo[OrcidDOI]
+  implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
+
+  val mapper = new ObjectMapper()
+  mapper.getDeserializationConfig.withFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
+
+  val dataset:Dataset[OrcidDOI] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => mapper.readValue(s,classOf[OrcidDOI])))
+
+  logger.info("Converting ORCID to OAF")
+  dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null)
+    .map(d => (d.getId, d))
+    .groupByKey(_._1)(Encoders.STRING)
+    .agg(getPublicationAggregator().toColumn)
+    .map(p => p._2)
+    .write.mode(SaveMode.Overwrite).save(targetPath)
+}

  def main(args: Array[String]): Unit = {

-    val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass)
+
    val conf: SparkConf = new SparkConf()
    val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json")))
    parser.parseArgument(args)
@ -26,19 +77,12 @@ object SparkConvertORCIDToOAF {
        .appName(getClass.getSimpleName)
        .master(parser.get("master")).getOrCreate()

-    implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
-    implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
-    import spark.implicits._
+
+
    val sourcePath = parser.get("sourcePath")
    val targetPath = parser.get("targetPath")
-    val dataset:Dataset[ORCIDElement] = spark.read.json(sourcePath).as[ORCIDElement]
+    run(spark, sourcePath, targetPath)

-
-    logger.info("Converting ORCID to OAF")
-    val d:RDD[Publication] = dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null).map(p=>(p.getId,p)).rdd.reduceByKey(ConversionUtil.mergePublication)
-      .map(_._2)
-
-    spark.createDataset(d).as[Publication].write.mode(SaveMode.Overwrite).save(targetPath)
  }

 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java
@ -0,0 +1,188 @@
+
+package eu.dnetlib.doiboost.orcid;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.util.LongAccumulator;
+import org.mortbay.log.Log;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData;
+import scala.Tuple2;
+
+public class SparkDownloadOrcidAuthors {
+
+	static Logger logger = LoggerFactory.getLogger(SparkDownloadOrcidAuthors.class);
+	static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
+	static final String lastUpdate = "2020-09-29 00:00:00";
+
+	public static void main(String[] args) throws IOException, Exception {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					SparkDownloadOrcidAuthors.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/doiboost/download_orcid_data.json")));
+		parser.parseArgument(args);
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+		final String workingPath = parser.get("workingPath");
+		logger.info("workingPath: ", workingPath);
+		final String outputPath = parser.get("outputPath");
+		logger.info("outputPath: ", outputPath);
+		final String token = parser.get("token");
+		final String lambdaFileName = parser.get("lambdaFileName");
+		logger.info("lambdaFileName: ", lambdaFileName);
+
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+				LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records");
+				LongAccumulator modifiedRecordsAcc = spark.sparkContext().longAccumulator("to_download_records");
+				LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records");
+				LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403");
+				LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409");
+				LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503");
+				LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525");
+				LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic");
+
+				logger.info("Retrieving data from lamda sequence file");
+				JavaPairRDD<Text, Text> lamdaFileRDD = sc
+					.sequenceFile(workingPath + lambdaFileName, Text.class, Text.class);
+				logger.info("Data retrieved: " + lamdaFileRDD.count());
+
+				Function<Tuple2<Text, Text>, Boolean> isModifiedAfterFilter = data -> {
+					String orcidId = data._1().toString();
+					String lastModifiedDate = data._2().toString();
+					parsedRecordsAcc.add(1);
+					if (isModified(orcidId, lastModifiedDate)) {
+						modifiedRecordsAcc.add(1);
+						return true;
+					}
+					return false;
+				};
+
+				Function<Tuple2<Text, Text>, Tuple2<String, String>> downloadRecordFunction = data -> {
+					String orcidId = data._1().toString();
+					String lastModifiedDate = data._2().toString();
+					final DownloadedRecordData downloaded = new DownloadedRecordData();
+					downloaded.setOrcidId(orcidId);
+					downloaded.setLastModifiedDate(lastModifiedDate);
+					try (CloseableHttpClient client = HttpClients.createDefault()) {
+						HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
+						httpGet.addHeader("Accept", "application/vnd.orcid+xml");
+						httpGet.addHeader("Authorization", String.format("Bearer %s", token));
+						long startReq = System.currentTimeMillis();
+						CloseableHttpResponse response = client.execute(httpGet);
+						long endReq = System.currentTimeMillis();
+						long reqTime = endReq - startReq;
+						if (reqTime < 1000) {
+							Thread.sleep(1000 - reqTime);
+						}
+						int statusCode = response.getStatusLine().getStatusCode();
+						downloaded.setStatusCode(statusCode);
+						if (statusCode != 200) {
+							switch (statusCode) {
+								case 403:
+									errorHTTP403Acc.add(1);
+								case 409:
+									errorHTTP409Acc.add(1);
+								case 503:
+									errorHTTP503Acc.add(1);
+									throw new RuntimeException("Orcid request rate limit reached (HTTP 503)");
+								case 525:
+									errorHTTP525Acc.add(1);
+								default:
+									errorHTTPGenericAcc.add(1);
+									logger
+										.info(
+											"Downloading " + orcidId + " status code: "
+												+ response.getStatusLine().getStatusCode());
+							}
+							return downloaded.toTuple2();
+						}
+						downloadedRecordsAcc.add(1);
+						downloaded
+							.setCompressedData(
+								ArgumentApplicationParser
+									.compressArgument(IOUtils.toString(response.getEntity().getContent())));
+					} catch (Throwable e) {
+						logger.info("Downloading " + orcidId, e.getMessage());
+						downloaded.setErrorMessage(e.getMessage());
+						return downloaded.toTuple2();
+					}
+					return downloaded.toTuple2();
+				};
+
+				sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
+
+				logger.info("Start execution ...");
+				JavaPairRDD<Text, Text> authorsModifiedRDD = lamdaFileRDD.filter(isModifiedAfterFilter);
+				logger.info("Authors modified count: " + authorsModifiedRDD.count());
+				logger.info("Start downloading ...");
+				authorsModifiedRDD
+					.repartition(10)
+					.map(downloadRecordFunction)
+					.mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2())))
+					.saveAsNewAPIHadoopFile(
+						workingPath.concat(outputPath),
+						Text.class,
+						Text.class,
+						SequenceFileOutputFormat.class,
+						sc.hadoopConfiguration());
+				logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString());
+				logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString());
+				logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString());
+				logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString());
+				logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString());
+				logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString());
+				logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString());
+				logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString());
+			});
+
+	}
+
+	private static boolean isModified(String orcidId, String modifiedDate) {
+		Date modifiedDateDt = null;
+		Date lastUpdateDt = null;
+		try {
+			if (modifiedDate.length() != 19) {
+				modifiedDate = modifiedDate.substring(0, 19);
+			}
+			modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate);
+			lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
+		} catch (Exception e) {
+			logger.info("[" + orcidId + "] Parsing date: ", e.getMessage());
+			return true;
+		}
+		return modifiedDateDt.after(lastUpdateDt);
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java
@ -0,0 +1,99 @@
+
+package eu.dnetlib.doiboost.orcid;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.spark.SparkConf;
+import org.mortbay.log.Log;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class SparkGenLastModifiedSeq {
+	private static String hdfsServerUri;
+	private static String workingPath;
+	private static String outputPath;
+	private static String lambdaFileName;
+
+	public static void main(String[] args) throws IOException, Exception {
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					SparkGenLastModifiedSeq.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/doiboost/download_orcid_data.json")));
+		parser.parseArgument(args);
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		hdfsServerUri = parser.get("hdfsServerUri");
+		workingPath = parser.get("workingPath");
+		outputPath = parser.get("outputPath");
+		lambdaFileName = parser.get("lambdaFileName");
+		String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName);
+
+		SparkConf sparkConf = new SparkConf();
+		runWithSparkSession(
+			sparkConf,
+			isSparkSessionManaged,
+			spark -> {
+				int rowsNum = 0;
+				Path output = new Path(
+					hdfsServerUri
+						.concat(workingPath)
+						.concat(outputPath));
+				Path hdfsreadpath = new Path(lambdaFileUri);
+				Configuration conf = new Configuration();
+				conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath));
+				conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
+				conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
+				FileSystem fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf);
+				FSDataInputStream lambdaFileStream = fs.open(hdfsreadpath);
+				try (TarArchiveInputStream tais = new TarArchiveInputStream(
+					new GzipCompressorInputStream(lambdaFileStream))) {
+					TarArchiveEntry entry = null;
+					try (SequenceFile.Writer writer = SequenceFile
+						.createWriter(
+							conf,
+							SequenceFile.Writer.file(output),
+							SequenceFile.Writer.keyClass(Text.class),
+							SequenceFile.Writer.valueClass(Text.class),
+							SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) {
+						while ((entry = tais.getNextTarEntry()) != null) {
+							BufferedReader br = new BufferedReader(new InputStreamReader(tais));
+							String line;
+							while ((line = br.readLine()) != null) {
+								String[] values = line.split(",");
+								List<String> recordInfo = Arrays.asList(values);
+								String orcidId = recordInfo.get(0);
+								final Text key = new Text(orcidId);
+								final Text value = new Text(recordInfo.get(3));
+								writer.append(key, value);
+								rowsNum++;
+							}
+						}
+					}
+				}
+				Log.info("Saved rows from lamda csv tar file: " + rowsNum);
+			});
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java
@ -13,9 +13,6 @@ import java.util.stream.Stream;

 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
@ -33,7 +30,7 @@ import com.google.gson.JsonElement;
 import com.google.gson.JsonParser;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.doiboost.orcid.model.AuthorData;
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
 import eu.dnetlib.doiboost.orcid.model.WorkData;
 import scala.Tuple2;

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkOrcidGenerateAuthors.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkOrcidGenerateAuthors.java
@ -1,165 +0,0 @@
-
-package eu.dnetlib.doiboost.orcid;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.List;
-import java.util.Optional;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.io.Text;
-import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpGet;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClients;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.Function;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.SaveMode;
-import org.apache.spark.util.LongAccumulator;
-import org.mortbay.log.Log;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData;
-import scala.Tuple2;
-
-public class SparkOrcidGenerateAuthors {
-
-	static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
-	static final String lastUpdate = "2019-09-30 00:00:00";
-
-	public static void main(String[] args) throws IOException, Exception {
-		Logger logger = LoggerFactory.getLogger(SparkOrcidGenerateAuthors.class);
-		logger.info("[ SparkOrcidGenerateAuthors STARTED]");
-
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					SparkOrcidGenerateAuthors.class
-						.getResourceAsStream(
-							"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json")));
-		parser.parseArgument(args);
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-		logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-		final String workingPath = parser.get("workingPath");
-		logger.info("workingPath: ", workingPath);
-		final String outputAuthorsPath = parser.get("outputAuthorsPath");
-		logger.info("outputAuthorsPath: ", outputAuthorsPath);
-		final String token = parser.get("token");
-
-		SparkConf conf = new SparkConf();
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> {
-				JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-
-				LongAccumulator parsedRecordsAcc = sc.sc().longAccumulator("parsedRecords");
-				LongAccumulator modifiedRecordsAcc = sc.sc().longAccumulator("modifiedRecords");
-				LongAccumulator downloadedRecordsAcc = sc.sc().longAccumulator("downloadedRecords");
-				LongAccumulator alreadyDownloadedRecords = sc.sc().longAccumulator("alreadyDownloadedRecords");
-				JavaRDD<String> lamdaFileRDD = sc.textFile(workingPath + "lamdafiles");
-
-				JavaRDD<String> downloadedRDD = sc.textFile(workingPath + "downloaded");
-				Function<String, String> getOrcidIdFunction = line -> {
-					try {
-						String[] values = line.split(",");
-						return values[0].substring(1);
-					} catch (Exception e) {
-						return new String("");
-					}
-				};
-				List<String> downloadedRecords = downloadedRDD.map(getOrcidIdFunction).collect();
-
-				Function<String, Boolean> isModifiedAfterFilter = line -> {
-					String[] values = line.split(",");
-					String orcidId = values[0];
-					parsedRecordsAcc.add(1);
-					if (isModified(orcidId, values[3])) {
-						modifiedRecordsAcc.add(1);
-						return true;
-					}
-					return false;
-				};
-				Function<String, Boolean> isNotDownloadedFilter = line -> {
-					String[] values = line.split(",");
-					String orcidId = values[0];
-					if (downloadedRecords.contains(orcidId)) {
-						alreadyDownloadedRecords.add(1);
-						return false;
-					}
-					return true;
-				};
-				Function<String, Tuple2<String, String>> downloadRecordFunction = line -> {
-					String[] values = line.split(",");
-					String orcidId = values[0];
-					String modifiedDate = values[3];
-					return downloadRecord(orcidId, modifiedDate, token, downloadedRecordsAcc);
-				};
-
-				lamdaFileRDD
-					.filter(isModifiedAfterFilter)
-					.filter(isNotDownloadedFilter)
-					.map(downloadRecordFunction)
-					.rdd()
-					.saveAsTextFile(workingPath.concat(outputAuthorsPath));
-			});
-
-	}
-
-	private static boolean isModified(String orcidId, String modifiedDate) {
-		Date modifiedDateDt = null;
-		Date lastUpdateDt = null;
-		try {
-			if (modifiedDate.length() != 19) {
-				modifiedDate = modifiedDate.substring(0, 19);
-			}
-			modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate);
-			lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
-		} catch (Exception e) {
-			Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage());
-			return true;
-		}
-		return modifiedDateDt.after(lastUpdateDt);
-	}
-
-	private static Tuple2<String, String> downloadRecord(String orcidId, String modifiedDate, String token,
-		LongAccumulator downloadedRecordsAcc) {
-		final DownloadedRecordData data = new DownloadedRecordData();
-		data.setOrcidId(orcidId);
-		data.setModifiedDate(modifiedDate);
-		try (CloseableHttpClient client = HttpClients.createDefault()) {
-			HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
-			httpGet.addHeader("Accept", "application/vnd.orcid+xml");
-			httpGet.addHeader("Authorization", String.format("Bearer %s", token));
-			CloseableHttpResponse response = client.execute(httpGet);
-			int statusCode = response.getStatusLine().getStatusCode();
-			data.setStatusCode(statusCode);
-			if (statusCode != 200) {
-				Log
-					.warn(
-						"Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
-				return data.toTuple2();
-			}
-			downloadedRecordsAcc.add(1);
-			data
-				.setCompressedData(
-					ArgumentApplicationParser.compressArgument(IOUtils.toString(response.getEntity().getContent())));
-		} catch (Throwable e) {
-			Log.warn("Downloading " + orcidId, e.getMessage());
-			data.setErrorMessage(e.getMessage());
-			return data.toTuple2();
-		}
-		return data.toTuple2();
-	}
-}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPartitionLambdaFile.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPartitionLambdaFile.java
@ -1,50 +0,0 @@
-
-package eu.dnetlib.doiboost.orcid;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
-import java.io.IOException;
-import java.util.Optional;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-
-public class SparkPartitionLambdaFile {
-
-	public static void main(String[] args) throws IOException, Exception {
-		Logger logger = LoggerFactory.getLogger(SparkOrcidGenerateAuthors.class);
-
-		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
-			IOUtils
-				.toString(
-					SparkOrcidGenerateAuthors.class
-						.getResourceAsStream(
-							"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json")));
-		parser.parseArgument(args);
-		Boolean isSparkSessionManaged = Optional
-			.ofNullable(parser.get("isSparkSessionManaged"))
-			.map(Boolean::valueOf)
-			.orElse(Boolean.TRUE);
-		final String workingPath = parser.get("workingPath");
-
-		SparkConf conf = new SparkConf();
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> {
-				JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-				JavaRDD<String> lamdaFileRDD = sc.textFile(workingPath + "last_modified.csv");
-
-				lamdaFileRDD
-					.repartition(20)
-					.saveAsTextFile(workingPath.concat("lamdafiles"));
-			});
-	}
-
-}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java
@ -17,11 +17,12 @@ import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.io.compress.GzipCodec;
 import org.mortbay.log.Log;

-import eu.dnetlib.doiboost.orcid.json.JsonWriter;
-import eu.dnetlib.doiboost.orcid.model.AuthorData;
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
 import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser;
+import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter;

 public class SummariesDecompressor {

@ -56,6 +57,7 @@ public class SummariesDecompressor {
 		int nameFound = 0;
 		int surnameFound = 0;
 		int creditNameFound = 0;
+		int otherNamesFound = 0;
 		int errorFromOrcidFound = 0;
 		int xmlParserErrorFound = 0;
 		try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
@ -117,6 +119,9 @@ public class SummariesDecompressor {
 								if (authorData.getCreditName() != null) {
 									creditNameFound += 1;
 								}
+								if (authorData.getOtherNames() != null && authorData.getOtherNames().size() > 1) {
+									otherNamesFound += authorData.getOtherNames().size();
+								}

 							} else {
 								Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
@ -152,7 +157,71 @@ public class SummariesDecompressor {
 		Log.info("Name found: " + nameFound);
 		Log.info("Surname found: " + surnameFound);
 		Log.info("Credit name found: " + creditNameFound);
+		Log.info("Other names found: " + otherNamesFound);
 		Log.info("Error from Orcid found: " + errorFromOrcidFound);
 		Log.info("Error parsing xml record found: " + xmlParserErrorFound);
 	}
+
+	public static void extractXML(Configuration conf, String inputUri, Path outputPath)
+		throws Exception {
+		String uri = inputUri;
+		FileSystem fs = FileSystem.get(URI.create(uri), conf);
+		Path inputPath = new Path(uri);
+		CompressionCodecFactory factory = new CompressionCodecFactory(conf);
+		CompressionCodec codec = factory.getCodec(inputPath);
+		if (codec == null) {
+			System.err.println("No codec found for " + uri);
+			System.exit(1);
+		}
+		CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
+		InputStream gzipInputStream = null;
+		try {
+			gzipInputStream = codec.createInputStream(fs.open(inputPath));
+			int counter = 0;
+			try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
+				TarArchiveEntry entry = null;
+				CompressionCodec Codec = new GzipCodec();
+				org.apache.hadoop.io.SequenceFile.Writer.Option optCom = SequenceFile.Writer
+					.compression(SequenceFile.CompressionType.RECORD, Codec);
+				try (SequenceFile.Writer writer = SequenceFile
+					.createWriter(
+						conf,
+						SequenceFile.Writer.file(outputPath),
+						SequenceFile.Writer.keyClass(Text.class),
+						SequenceFile.Writer.valueClass(Text.class), optCom)) {
+					while ((entry = tais.getNextTarEntry()) != null) {
+						String filename = entry.getName();
+						if (entry.isDirectory()) {
+							Log.debug("Directory entry name: " + entry.getName());
+						} else {
+							Log.debug("XML record entry name: " + entry.getName());
+							counter++;
+							BufferedReader br = new BufferedReader(new InputStreamReader(tais));
+							String line;
+							StringBuffer buffer = new StringBuffer();
+							while ((line = br.readLine()) != null) {
+								buffer.append(line);
+							}
+							String xml = buffer.toString();
+							final Text key = new Text(
+								XMLRecordParser
+									.retrieveOrcidIdFromSummary(
+										xml.getBytes(), filename.split("/")[2].substring(0, 19)));
+							final Text value = new Text(xml);
+							writer.append(key, value);
+						}
+						if ((counter % 100000) == 0) {
+							Log.info("Current xml records extracted: " + counter);
+						}
+					}
+				}
+			}
+			Log.info("Summaries extract completed");
+			Log.info("Total XML records parsed: " + counter);
+
+		} finally {
+			Log.debug("Closing gzip stream");
+			IOUtils.closeStream(gzipInputStream);
+		}
+	}
 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java
@ -0,0 +1,13 @@
+
+package eu.dnetlib.doiboost.orcid.json;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
+
+public class JsonHelper {
+
+	public static String createOidWork(WorkDataNoDoi workData) {
+		return new Gson().toJson(workData);
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonWriter.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonWriter.java
@ -1,28 +0,0 @@
-
-package eu.dnetlib.doiboost.orcid.json;
-
-import com.google.gson.JsonObject;
-
-import eu.dnetlib.doiboost.orcid.model.AuthorData;
-import eu.dnetlib.doiboost.orcid.model.WorkData;
-
-public class JsonWriter {
-
-	public static String create(AuthorData authorData) {
-		JsonObject author = new JsonObject();
-		author.addProperty("oid", authorData.getOid());
-		author.addProperty("name", authorData.getName());
-		author.addProperty("surname", authorData.getSurname());
-		if (authorData.getCreditName() != null) {
-			author.addProperty("creditname", authorData.getCreditName());
-		}
-		return author.toString();
-	}
-
-	public static String create(WorkData workData) {
-		JsonObject work = new JsonObject();
-		work.addProperty("oid", workData.getOid());
-		work.addProperty("doi", workData.getDoi());
-		return work.toString();
-	}
-}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/DownloadedRecordData.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/DownloadedRecordData.java
@ -3,8 +3,6 @@ package eu.dnetlib.doiboost.orcid.model;

 import java.io.Serializable;

-import org.apache.hadoop.io.Text;
-
 import com.google.gson.JsonObject;

 import scala.Tuple2;
@ -12,7 +10,7 @@ import scala.Tuple2;
 public class DownloadedRecordData implements Serializable {

 	private String orcidId;
-	private String modifiedDate;
+	private String lastModifiedDate;
 	private String statusCode;
 	private String compressedData;
 	private String errorMessage;
@ -20,7 +18,7 @@ public class DownloadedRecordData implements Serializable {
 	public Tuple2<String, String> toTuple2() {
 		JsonObject data = new JsonObject();
 		data.addProperty("statusCode", getStatusCode());
-		data.addProperty("modifiedDate", getModifiedDate());
+		data.addProperty("lastModifiedDate", getLastModifiedDate());
 		if (getCompressedData() != null) {
 			data.addProperty("compressedData", getCompressedData());
 		}
@ -66,11 +64,11 @@ public class DownloadedRecordData implements Serializable {
 		this.compressedData = compressedData;
 	}

-	public String getModifiedDate() {
-		return modifiedDate;
+	public String getLastModifiedDate() {
+		return lastModifiedDate;
 	}

-	public void setModifiedDate(String modifiedDate) {
-		this.modifiedDate = modifiedDate;
+	public void setLastModifiedDate(String lastModifiedDate) {
+		this.lastModifiedDate = lastModifiedDate;
 	}
 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java
@ -4,6 +4,8 @@ package eu.dnetlib.doiboost.orcid.xml;
 import java.util.Arrays;
 import java.util.List;

+import org.mortbay.log.Log;
+
 import com.ximpleware.AutoPilot;
 import com.ximpleware.EOFException;
 import com.ximpleware.EncodingException;
@ -14,7 +16,7 @@ import com.ximpleware.VTDNav;

 import eu.dnetlib.dhp.parser.utility.VtdException;
 import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
-import eu.dnetlib.doiboost.orcid.model.AuthorData;
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
 import eu.dnetlib.doiboost.orcid.model.WorkData;

 public class XMLRecordParser {
@ -81,6 +83,12 @@ public class XMLRecordParser {
 		if (!creditNames.isEmpty()) {
 			authorData.setCreditName(creditNames.get(0));
 		}
+
+		final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
+		if (!otherNames.isEmpty()) {
+			authorData.setOtherNames(otherNames);
+		}
+
 		return authorData;
 	}

@ -120,4 +128,33 @@ public class XMLRecordParser {
 		}
 		return workData;
 	}
+
+	public static String retrieveOrcidIdFromSummary(byte[] bytes, String defaultValue)
+		throws VtdException, ParseException {
+		return retrieveOrcidId(bytes, defaultValue, NS_RECORD, NS_RECORD_URL, "//record:record", "path").substring(1);
+	}
+
+	public static String retrieveOrcidIdFromActivity(byte[] bytes, String defaultValue)
+		throws VtdException, ParseException {
+		return retrieveOrcidId(bytes, defaultValue, NS_WORK, NS_WORK_URL, "//work:work", "put-code");
+	}
+
+	private static String retrieveOrcidId(byte[] bytes, String defaultValue, String ns, String nsUrl, String xpath,
+		String idAttributeName)
+		throws VtdException, ParseException {
+		final VTDGen vg = new VTDGen();
+		vg.setDoc(bytes);
+		vg.parse(true);
+		final VTDNav vn = vg.getNav();
+		final AutoPilot ap = new AutoPilot(vn);
+		ap.declareXPathNameSpace(ns, nsUrl);
+		List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
+			.getTextValuesWithAttributes(
+				ap, vn, xpath, Arrays.asList(idAttributeName));
+		if (!recordNodes.isEmpty()) {
+			return (recordNodes.get(0).getAttributes().get(idAttributeName));
+		}
+		Log.info("id not found - default: " + defaultValue);
+		return defaultValue;
+	}
 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java
@ -0,0 +1,154 @@
+
+package eu.dnetlib.doiboost.orcidnodoi;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.mortbay.log.Log;
+
+import eu.dnetlib.doiboost.orcid.json.JsonHelper;
+import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
+import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi;
+
+/**
+ * This class write on hdfs one sequence file, the key is an orcid identifier and the
+ * value is an orcid publication in json format
+ */
+
+public class ActivitiesDumpReader {
+
+	private static final int MAX_XML_WORKS_PARSED = -1;
+	private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000;
+
+	public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath)
+		throws Exception {
+		String uri = inputUri;
+		FileSystem fs = FileSystem.get(URI.create(uri), conf);
+		Path inputPath = new Path(uri);
+		CompressionCodecFactory factory = new CompressionCodecFactory(conf);
+		CompressionCodec codec = factory.getCodec(inputPath);
+		if (codec == null) {
+			System.err.println("No codec found for " + uri);
+			System.exit(1);
+		}
+		CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
+		InputStream gzipInputStream = null;
+		try {
+			gzipInputStream = codec.createInputStream(fs.open(inputPath));
+			parseTarActivities(fs, conf, gzipInputStream, outputPath);
+
+		} finally {
+			Log.debug("Closing gzip stream");
+			IOUtils.closeStream(gzipInputStream);
+		}
+	}
+
+	private static void parseTarActivities(
+		FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) {
+		int counter = 0;
+		int noDoiFound = 0;
+		int errorFromOrcidFound = 0;
+		int xmlParserErrorFound = 0;
+		try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
+			TarArchiveEntry entry = null;
+
+			try (SequenceFile.Writer writer = SequenceFile
+				.createWriter(
+					conf,
+					SequenceFile.Writer.file(outputPath),
+					SequenceFile.Writer.keyClass(Text.class),
+					SequenceFile.Writer.valueClass(Text.class))) {
+				while ((entry = tais.getNextTarEntry()) != null) {
+					String filename = entry.getName();
+					StringBuffer buffer = new StringBuffer();
+					try {
+						if (entry.isDirectory() || !filename.contains("works")) {
+
+						} else {
+							Log.debug("XML work entry name: " + entry.getName());
+							counter++;
+							BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from
+																									// tarInput
+							String line;
+							buffer = new StringBuffer();
+							while ((line = br.readLine()) != null) {
+								buffer.append(line);
+							}
+							WorkDataNoDoi workDataNoDoi = XMLRecordParserNoDoi
+								.VTDParseWorkData(buffer.toString().getBytes());
+							if (workDataNoDoi != null) {
+								if (workDataNoDoi.getErrorCode() != null) {
+									errorFromOrcidFound += 1;
+									Log
+										.debug(
+											"error from Orcid with code "
+												+ workDataNoDoi.getErrorCode()
+												+ " for entry "
+												+ entry.getName());
+									continue;
+								}
+								boolean isDoiFound = workDataNoDoi
+									.getExtIds()
+									.stream()
+									.filter(e -> e.getType() != null)
+									.anyMatch(e -> e.getType().equals("doi"));
+								if (!isDoiFound) {
+									String jsonData = JsonHelper.createOidWork(workDataNoDoi);
+									Log.debug("oid: " + workDataNoDoi.getOid() + " data: " + jsonData);
+
+									final Text key = new Text(workDataNoDoi.getOid());
+									final Text value = new Text(jsonData);
+
+									try {
+										writer.append(key, value);
+									} catch (IOException e) {
+										Log.debug("Writing to sequence file: " + e.getMessage());
+										Log.debug(e);
+										throw new RuntimeException(e);
+									}
+									noDoiFound += 1;
+								}
+
+							} else {
+								Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
+								xmlParserErrorFound += 1;
+							}
+						}
+					} catch (Exception e) {
+						throw new Exception(filename, e);
+					}
+
+					if ((counter % XML_WORKS_PARSED_COUNTER_LOG_INTERVAL) == 0) {
+						Log.info("Current xml works parsed: " + counter);
+					}
+
+					if ((MAX_XML_WORKS_PARSED > -1) && (counter > MAX_XML_WORKS_PARSED)) {
+						break;
+					}
+				}
+			}
+		} catch (Exception e) {
+			Log.warn("Parsing work from gzip archive: " + e.getMessage());
+			Log.warn(e);
+			throw new RuntimeException(e);
+		}
+		Log.info("Activities parse completed");
+		Log.info("Total XML works parsed: " + counter);
+		Log.info("Total no doi work found: " + noDoiFound);
+		Log.info("Error from Orcid found: " + errorFromOrcidFound);
+		Log.info("Error parsing xml work found: " + xmlParserErrorFound);
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java
@ -0,0 +1,57 @@
+
+package eu.dnetlib.doiboost.orcidnodoi;
+
+import java.io.IOException;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.mortbay.log.Log;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.doiboost.orcid.OrcidDSManager;
+
+/**
+ * This job generates one sequence file, the key is an orcid identifier and the
+ * value is an orcid publication in json format
+ */
+
+public class GenOrcidAuthorWork extends OrcidDSManager {
+
+	private String activitiesFileNameTarGz;
+	private String outputWorksPath;
+
+	public static void main(String[] args) throws IOException, Exception {
+		GenOrcidAuthorWork genOrcidAuthorWork = new GenOrcidAuthorWork();
+		genOrcidAuthorWork.loadArgs(args);
+		genOrcidAuthorWork.generateAuthorsDOIsData();
+	}
+
+	public void generateAuthorsDOIsData() throws Exception {
+		Configuration conf = initConfigurationObject();
+		FileSystem fs = initFileSystemObject(conf);
+		String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz);
+		Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputWorksPath));
+		ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath);
+	}
+
+	private void loadArgs(String[] args) throws IOException, Exception {
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					GenOrcidAuthorWork.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json")));
+		parser.parseArgument(args);
+
+		hdfsServerUri = parser.get("hdfsServerUri");
+		Log.info("HDFS URI: " + hdfsServerUri);
+		workingPath = parser.get("workingPath");
+		Log.info("Working Path: " + workingPath);
+		activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz");
+		Log.info("Activities File Name: " + activitiesFileNameTarGz);
+		outputWorksPath = parser.get("outputWorksPath");
+		Log.info("Output Author Work Data: " + outputWorksPath);
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java
@ -0,0 +1,180 @@
+
+package eu.dnetlib.doiboost.orcidnodoi;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.IOException;
+import java.util.Objects;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.util.LongAccumulator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.gson.Gson;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonParser;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.action.AtomicAction;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
+import eu.dnetlib.doiboost.orcid.json.JsonHelper;
+import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
+import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
+import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
+import scala.Tuple2;
+
+/**
+ * This spark job generates one parquet file, containing orcid publications dataset
+ */
+
+public class SparkGenEnrichedOrcidWorks {
+
+	static Logger logger = LoggerFactory.getLogger(SparkGenEnrichedOrcidWorks.class);
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	public static void main(String[] args) throws IOException, Exception {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					SparkGenEnrichedOrcidWorks.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json")));
+		parser.parseArgument(args);
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		final String workingPath = parser.get("workingPath");
+		final String outputEnrichedWorksPath = parser.get("outputEnrichedWorksPath");
+		final String outputWorksPath = parser.get("outputWorksPath");
+		final String hdfsServerUri = parser.get("hdfsServerUri");
+
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+				JavaPairRDD<Text, Text> summariesRDD = sc
+					.sequenceFile(workingPath + "authors/authors.seq", Text.class, Text.class);
+				Dataset<AuthorData> summariesDataset = spark
+					.createDataset(
+						summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(),
+						Encoders.bean(AuthorData.class));
+				logger.info("Authors data loaded: " + summariesDataset.count());
+
+				JavaPairRDD<Text, Text> activitiesRDD = sc
+					.sequenceFile(workingPath + outputWorksPath + "*.seq", Text.class, Text.class);
+				Dataset<WorkDataNoDoi> activitiesDataset = spark
+					.createDataset(
+						activitiesRDD.map(seq -> loadWorkFromJson(seq._1(), seq._2())).rdd(),
+						Encoders.bean(WorkDataNoDoi.class));
+				logger.info("Works data loaded: " + activitiesDataset.count());
+
+				JavaRDD<Tuple2<String, String>> enrichedWorksRDD = activitiesDataset
+					.joinWith(
+						summariesDataset,
+						activitiesDataset.col("oid").equalTo(summariesDataset.col("oid")), "inner")
+					.map(
+						(MapFunction<Tuple2<WorkDataNoDoi, AuthorData>, Tuple2<String, String>>) value -> {
+							WorkDataNoDoi w = value._1;
+							AuthorData a = value._2;
+							AuthorMatcher.match(a, w.getContributors());
+							return new Tuple2<>(a.getOid(), JsonHelper.createOidWork(w));
+						},
+						Encoders.tuple(Encoders.STRING(), Encoders.STRING()))
+					.filter(Objects::nonNull)
+					.toJavaRDD();
+				logger.info("Enriched works RDD ready.");
+
+				final LongAccumulator parsedPublications = spark.sparkContext().longAccumulator("parsedPublications");
+				final LongAccumulator enrichedPublications = spark
+					.sparkContext()
+					.longAccumulator("enrichedPublications");
+				final LongAccumulator errorsGeneric = spark.sparkContext().longAccumulator("errorsGeneric");
+				final LongAccumulator errorsInvalidTitle = spark.sparkContext().longAccumulator("errorsInvalidTitle");
+				final LongAccumulator errorsNotFoundAuthors = spark
+					.sparkContext()
+					.longAccumulator("errorsNotFoundAuthors");
+				final LongAccumulator errorsInvalidType = spark.sparkContext().longAccumulator("errorsInvalidType");
+				final PublicationToOaf publicationToOaf = new PublicationToOaf(
+					parsedPublications,
+					enrichedPublications,
+					errorsGeneric,
+					errorsInvalidTitle,
+					errorsNotFoundAuthors,
+					errorsInvalidType);
+				JavaRDD<Publication> oafPublicationRDD = enrichedWorksRDD
+					.map(
+						e -> {
+							return (Publication) publicationToOaf
+								.generatePublicationActionsFromJson(e._2());
+						})
+					.filter(p -> p != null);
+
+				sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
+
+				oafPublicationRDD
+					.mapToPair(
+						p -> new Tuple2<>(p.getClass().toString(),
+							OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, (Publication) p))))
+					.mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2())))
+					.saveAsNewAPIHadoopFile(
+						workingPath.concat(outputEnrichedWorksPath),
+						Text.class,
+						Text.class,
+						SequenceFileOutputFormat.class,
+						sc.hadoopConfiguration());
+
+				logger.info("parsedPublications: " + parsedPublications.value().toString());
+				logger.info("enrichedPublications: " + enrichedPublications.value().toString());
+				logger.info("errorsGeneric: " + errorsGeneric.value().toString());
+				logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString());
+				logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString());
+				logger.info("errorsInvalidType: " + errorsInvalidType.value().toString());
+			});
+	}
+
+	private static AuthorData loadAuthorFromJson(Text orcidId, Text json) {
+		AuthorData authorData = new AuthorData();
+		authorData.setOid(orcidId.toString());
+		JsonElement jElement = new JsonParser().parse(json.toString());
+		authorData.setName(getJsonValue(jElement, "name"));
+		authorData.setSurname(getJsonValue(jElement, "surname"));
+		authorData.setCreditName(getJsonValue(jElement, "creditname"));
+		return authorData;
+	}
+
+	private static WorkDataNoDoi loadWorkFromJson(Text orcidId, Text json) {
+
+		WorkDataNoDoi workData = new Gson().fromJson(json.toString(), WorkDataNoDoi.class);
+		return workData;
+	}
+
+	private static String getJsonValue(JsonElement jElement, String property) {
+		if (jElement.getAsJsonObject().has(property)) {
+			JsonElement name = null;
+			name = jElement.getAsJsonObject().get(property);
+			if (name != null && !name.isJsonNull()) {
+				return name.getAsString();
+			}
+		}
+		return new String("");
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java
@ -0,0 +1,31 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.json;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.gson.JsonObject;
+
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
+import eu.dnetlib.doiboost.orcid.model.WorkData;
+
+/**
+ * This class converts an object to json and viceversa
+ */
+
+public class JsonWriter {
+
+	public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper()
+		.setSerializationInclusion(JsonInclude.Include.NON_NULL);;
+
+	public static String create(AuthorData authorData) throws JsonProcessingException {
+		return OBJECT_MAPPER.writeValueAsString(authorData);
+	}
+
+	public static String create(WorkData workData) {
+		JsonObject work = new JsonObject();
+		work.addProperty("oid", workData.getOid());
+		work.addProperty("doi", workData.getDoi());
+		return work.toString();
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java
@ -0,0 +1,58 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.model;
+
+import java.io.Serializable;
+
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
+
+/**
+ * This class models the data related to a contributor, that are retrieved from an orcid publication
+ */
+
+public class Contributor extends AuthorData implements Serializable {
+	private String sequence;
+	private String role;
+	private transient boolean simpleMatch = false;
+	private transient Double score = 0.0;
+	private transient boolean bestMatch = false;
+
+	public String getSequence() {
+		return sequence;
+	}
+
+	public void setSequence(String sequence) {
+		this.sequence = sequence;
+	}
+
+	public String getRole() {
+		return role;
+	}
+
+	public void setRole(String role) {
+		this.role = role;
+	}
+
+	public boolean isSimpleMatch() {
+		return simpleMatch;
+	}
+
+	public void setSimpleMatch(boolean simpleMatch) {
+		this.simpleMatch = simpleMatch;
+	}
+
+	public Double getScore() {
+		return score;
+	}
+
+	public void setScore(Double score) {
+		this.score = score;
+	}
+
+	public boolean isBestMatch() {
+		return bestMatch;
+	}
+
+	public void setBestMatch(boolean bestMatch) {
+		this.bestMatch = bestMatch;
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java
@ -0,0 +1,36 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.model;
+
+/**
+ * This class models the data related to external id, that are retrieved from an orcid publication
+ */
+
+public class ExternalId {
+	private String type;
+	private String value;
+	private String relationShip;
+
+	public String getType() {
+		return type;
+	}
+
+	public void setType(String type) {
+		this.type = type;
+	}
+
+	public String getValue() {
+		return value;
+	}
+
+	public void setValue(String value) {
+		this.value = value;
+	}
+
+	public String getRelationShip() {
+		return relationShip;
+	}
+
+	public void setRelationShip(String relationShip) {
+		this.relationShip = relationShip;
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java
@ -0,0 +1,36 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.model;
+
+/**
+ * This class models the data related to a publication date, that are retrieved from an orcid publication
+ */
+
+public class PublicationDate {
+	private String year;
+	private String month;
+	private String day;
+
+	public String getYear() {
+		return year;
+	}
+
+	public void setYear(String year) {
+		this.year = year;
+	}
+
+	public String getMonth() {
+		return month;
+	}
+
+	public void setMonth(String month) {
+		this.month = month;
+	}
+
+	public String getDay() {
+		return day;
+	}
+
+	public void setDay(String day) {
+		this.day = day;
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java
@ -0,0 +1,104 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.model;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * This class models the data that are retrieved from orcid publication
+ */
+
+public class WorkDataNoDoi implements Serializable {
+
+	private String oid;
+	private String id;
+	private String sourceName;
+	private String type;
+	private List<String> titles;
+	private List<String> urls;
+	List<ExternalId> extIds;
+	List<PublicationDate> publicationDates;
+	List<Contributor> contributors;
+
+	public String getOid() {
+		return oid;
+	}
+
+	public void setOid(String oid) {
+		this.oid = oid;
+	}
+
+	public String getErrorCode() {
+		return errorCode;
+	}
+
+	public void setErrorCode(String errorCode) {
+		this.errorCode = errorCode;
+	}
+
+	private String errorCode;
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public List<String> getTitles() {
+		return titles;
+	}
+
+	public void setTitles(List<String> titles) {
+		this.titles = titles;
+	}
+
+	public String getSourceName() {
+		return sourceName;
+	}
+
+	public void setSourceName(String sourceName) {
+		this.sourceName = sourceName;
+	}
+
+	public String getType() {
+		return type;
+	}
+
+	public void setType(String type) {
+		this.type = type;
+	}
+
+	public List<String> getUrls() {
+		return urls;
+	}
+
+	public void setUrls(List<String> urls) {
+		this.urls = urls;
+	}
+
+	public List<ExternalId> getExtIds() {
+		return extIds;
+	}
+
+	public void setExtIds(List<ExternalId> extIds) {
+		this.extIds = extIds;
+	}
+
+	public List<PublicationDate> getPublicationDates() {
+		return publicationDates;
+	}
+
+	public void setPublicationDates(List<PublicationDate> publicationDates) {
+		this.publicationDates = publicationDates;
+	}
+
+	public List<Contributor> getContributors() {
+		return contributors;
+	}
+
+	public void setContributors(List<Contributor> contributors) {
+		this.contributors = contributors;
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java
@ -0,0 +1,543 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.oaf;
+
+import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*;
+
+import java.io.Serializable;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.spark.util.LongAccumulator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.*;
+
+import eu.dnetlib.dhp.common.PacePerson;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.utils.DHPUtils;
+import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility;
+import eu.dnetlib.doiboost.orcidnodoi.util.Pair;
+
+/**
+ * This class converts an orcid publication from json format to oaf
+ */
+
+public class PublicationToOaf implements Serializable {
+
+	static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class);
+
+	public static final String ORCID = "ORCID";
+	public final static String orcidPREFIX = "orcid_______";
+	public static final String OPENAIRE_PREFIX = "openaire____";
+	public static final String SEPARATOR = "::";
+
+	private final LongAccumulator parsedPublications;
+	private final LongAccumulator enrichedPublications;
+	private final LongAccumulator errorsGeneric;
+	private final LongAccumulator errorsInvalidTitle;
+	private final LongAccumulator errorsNotFoundAuthors;
+	private final LongAccumulator errorsInvalidType;
+
+	public PublicationToOaf(
+		LongAccumulator parsedPublications,
+		LongAccumulator enrichedPublications,
+		LongAccumulator errorsGeneric,
+		LongAccumulator errorsInvalidTitle,
+		LongAccumulator errorsNotFoundAuthors,
+		LongAccumulator errorsInvalidType) {
+		this.parsedPublications = parsedPublications;
+		this.enrichedPublications = enrichedPublications;
+		this.errorsGeneric = errorsGeneric;
+		this.errorsInvalidTitle = errorsInvalidTitle;
+		this.errorsNotFoundAuthors = errorsNotFoundAuthors;
+		this.errorsInvalidType = errorsInvalidType;
+	}
+
+	public PublicationToOaf() {
+		this.parsedPublications = null;
+		this.enrichedPublications = null;
+		this.errorsGeneric = null;
+		this.errorsInvalidTitle = null;
+		this.errorsNotFoundAuthors = null;
+		this.errorsInvalidType = null;
+	}
+
+	private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
+
+		{
+			put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
+
+		}
+	};
+
+	// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
+	private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
+
+		{
+			put("ark".toLowerCase(), new Pair<>("ark", "ark"));
+			put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv"));
+			put("pmc".toLowerCase(), new Pair<>("pmc", "pmc"));
+			put("pmid".toLowerCase(), new Pair<>("pmid", "pmid"));
+			put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid"));
+			put("urn".toLowerCase(), new Pair<>("urn", "urn"));
+		}
+	};
+
+	static Map<String, Map<String, String>> typologiesMapping;
+
+	static {
+		try {
+			final String tt = IOUtils
+				.toString(
+					PublicationToOaf.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json"));
+			typologiesMapping = new Gson().fromJson(tt, Map.class);
+		} catch (Exception e) {
+			throw new RuntimeException("loading typologies", e);
+		}
+	}
+
+	public static final String PID_TYPES = "dnet:pid_types";
+
+	public Oaf generatePublicationActionsFromJson(final String json) {
+		try {
+			if (parsedPublications != null) {
+				parsedPublications.add(1);
+			}
+			JsonElement jElement = new JsonParser().parse(json);
+			JsonObject jObject = jElement.getAsJsonObject();
+			return generatePublicationActionsFromDump(jObject);
+		} catch (Throwable t) {
+			logger.error("creating publication: " + t.getMessage());
+			if (errorsGeneric != null) {
+				errorsGeneric.add(1);
+			}
+			return null;
+		}
+	}
+
+	public Oaf generatePublicationActionsFromDump(final JsonObject rootElement) {
+
+		if (!isValid(rootElement)) {
+			return null;
+		}
+
+		Publication publication = new Publication();
+
+		final DataInfo dataInfo = new DataInfo();
+		dataInfo.setDeletedbyinference(false);
+		dataInfo.setInferred(false);
+		dataInfo.setTrust("0.9");
+		dataInfo
+			.setProvenanceaction(
+				mapQualifier(
+					"sysimport:actionset:orcidworks-no-doi",
+					"sysimport:actionset:orcidworks-no-doi",
+					"dnet:provenanceActions",
+					"dnet:provenanceActions"));
+		publication.setDataInfo(dataInfo);
+
+		publication.setLastupdatetimestamp(new Date().getTime());
+
+		publication.setDateofcollection("2020-10-14");
+		publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601());
+
+		// Adding external ids
+		externalIds
+			.keySet()
+			.stream()
+			.forEach(jsonExtId -> {
+				final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue();
+				final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey();
+				final String extId = getStringValue(rootElement, jsonExtId);
+				if (StringUtils.isNotBlank(extId)) {
+					publication
+						.getExternalReference()
+						.add(
+							convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types"));
+				}
+			});
+
+		// Adding source
+		final String source = getStringValue(rootElement, "sourceName");
+		if (StringUtils.isNotBlank(source)) {
+			Field<String> sourceField = mapStringField(source, null);
+			if (sourceField == null) {
+				publication.setSource(null);
+			} else {
+				publication.setSource(Arrays.asList(sourceField));
+			}
+		}
+
+		// Adding titles
+		final List<String> titles = createRepeatedField(rootElement, "titles");
+		if (titles == null || titles.isEmpty()) {
+			if (errorsInvalidTitle != null) {
+				errorsInvalidTitle.add(1);
+			}
+			return null;
+		}
+		Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
+		publication
+			.setTitle(
+				titles
+					.stream()
+					.map(t -> {
+						return mapStructuredProperty(t, q, null);
+					})
+					.filter(s -> s != null)
+					.collect(Collectors.toList()));
+		// Adding identifier
+		final String id = getStringValue(rootElement, "id");
+		String sourceId = null;
+		if (id != null) {
+			publication.setOriginalId(Arrays.asList(id));
+			sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase()));
+		} else {
+			String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(","));
+			sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase()));
+		}
+		publication.setId(sourceId);
+
+		// Adding relevant date
+		settingRelevantDate(rootElement, publication, "publication_date", "issued", true);
+
+		// Adding collectedfrom
+		publication.setCollectedfrom(Arrays.asList(createCollectedFrom()));
+
+		// Adding type
+		final String type = getStringValue(rootElement, "type");
+		String cobjValue = "";
+		if (StringUtils.isNotBlank(type)) {
+			publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource"));
+
+			final String typeValue = typologiesMapping.get(type).get("value");
+			cobjValue = typologiesMapping.get(type).get("cobj");
+			final Instance instance = new Instance();
+
+			// Adding hostedby
+			instance.setHostedby(createHostedBy());
+
+			// Adding url
+			final List<String> urls = createRepeatedField(rootElement, "urls");
+			if (urls != null && !urls.isEmpty()) {
+				instance.setUrl(urls);
+			} else {
+				dataInfo.setInvisible(true);
+			}
+
+			final String pubDate = getPublicationDate(rootElement, "publicationDates");
+			if (StringUtils.isNotBlank(pubDate)) {
+				instance.setDateofacceptance(mapStringField(pubDate, null));
+			}
+
+			instance.setCollectedfrom(createCollectedFrom());
+
+			// Adding accessright
+			instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes"));
+
+			// Adding type
+			instance
+				.setInstancetype(
+					mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource"));
+
+			publication.setInstance(Arrays.asList(instance));
+		} else {
+			if (errorsInvalidType != null) {
+				errorsInvalidType.add(1);
+			}
+			return null;
+		}
+
+		// Adding authors
+		final List<Author> authors = createAuthors(rootElement);
+		if (authors != null && authors.size() > 0) {
+			publication.setAuthor(authors);
+		} else {
+			if (errorsNotFoundAuthors != null) {
+				errorsNotFoundAuthors.add(1);
+			}
+			return null;
+		}
+		String classValue = getDefaultResulttype(cobjValue);
+		publication
+			.setResulttype(mapQualifier(classValue, classValue, "dnet:result_typologies", "dnet:result_typologies"));
+		if (enrichedPublications != null) {
+			enrichedPublications.add(1);
+		}
+		return publication;
+	}
+
+	public List<Author> createAuthors(final JsonObject root) {
+
+		final String authorsJSONFieldName = "contributors";
+
+		if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) {
+
+			final List<Author> authors = new ArrayList<>();
+			final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName);
+			int firstCounter = 0;
+			int defaultCounter = 0;
+			int rank = 1;
+			int currentRank = 0;
+
+			for (final JsonElement item : jsonAuthors) {
+				final JsonObject jsonAuthor = item.getAsJsonObject();
+				final Author author = new Author();
+				if (item.isJsonObject()) {
+					final String creditname = getStringValue(jsonAuthor, "creditName");
+					final String surname = getStringValue(jsonAuthor, "surname");
+					final String name = getStringValue(jsonAuthor, "name");
+					final String oid = getStringValue(jsonAuthor, "oid");
+					final String seq = getStringValue(jsonAuthor, "sequence");
+					if (StringUtils.isNotBlank(seq)) {
+						if (seq.equals("first")) {
+							firstCounter += 1;
+							rank = firstCounter;
+
+						} else if (seq.equals("additional")) {
+							rank = currentRank + 1;
+						} else {
+							defaultCounter += 1;
+							rank = defaultCounter;
+						}
+					}
+					if (StringUtils.isNotBlank(oid)) {
+						author.setPid(Arrays.asList(mapAuthorId(oid)));
+						author.setFullname(name + " " + surname);
+						if (StringUtils.isNotBlank(name)) {
+							author.setName(name);
+						}
+						if (StringUtils.isNotBlank(surname)) {
+							author.setSurname(surname);
+						}
+					} else {
+						PacePerson p = new PacePerson(creditname, false);
+						if (p.isAccurate()) {
+							author.setName(p.getNormalisedFirstName());
+							author.setSurname(p.getNormalisedSurname());
+							author.setFullname(p.getNormalisedFullname());
+						} else {
+							author.setFullname(creditname);
+						}
+					}
+				}
+				author.setRank(rank);
+				authors.add(author);
+				currentRank = rank;
+			}
+			return authors;
+
+		}
+		return null;
+	}
+
+	private List<String> createRepeatedField(final JsonObject rootElement, final String fieldName) {
+		if (!rootElement.has(fieldName)) {
+			return null;
+		}
+		if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) {
+			return null;
+		}
+		if (rootElement.get(fieldName).isJsonArray()) {
+			if (!isValidJsonArray(rootElement, fieldName)) {
+				return null;
+			}
+			return getArrayValues(rootElement, fieldName);
+		} else {
+			String field = getStringValue(rootElement, fieldName);
+			return Arrays.asList(cleanField(field));
+		}
+	}
+
+	private String cleanField(String value) {
+		if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') {
+			value = value.substring(1, value.length() - 1);
+		}
+		return value;
+	}
+
+	private void settingRelevantDate(final JsonObject rootElement,
+		final Publication publication,
+		final String jsonKey,
+		final String dictionaryKey,
+		final boolean addToDateOfAcceptance) {
+
+		final String pubDate = getPublicationDate(rootElement, "publication_date");
+		if (StringUtils.isNotBlank(pubDate)) {
+			if (addToDateOfAcceptance) {
+				publication.setDateofacceptance(mapStringField(pubDate, null));
+			}
+			Qualifier q = mapQualifier(dictionaryKey, dictionaryKey, "dnet:dataCite_date", "dnet:dataCite_date");
+			publication
+				.setRelevantdate(
+					Arrays
+						.asList(pubDate)
+						.stream()
+						.map(r -> {
+							return mapStructuredProperty(r, q, null);
+						})
+						.filter(s -> s != null)
+						.collect(Collectors.toList()));
+		}
+	}
+
+	private String getPublicationDate(final JsonObject rootElement,
+		final String jsonKey) {
+
+		JsonObject pubDateJson = null;
+		try {
+			pubDateJson = rootElement.getAsJsonObject(jsonKey);
+		} catch (Exception e) {
+			return null;
+		}
+		if (pubDateJson == null) {
+			return null;
+		}
+		final String year = getStringValue(pubDateJson, "year");
+		final String month = getStringValue(pubDateJson, "month");
+		final String day = getStringValue(pubDateJson, "day");
+
+		if (StringUtils.isBlank(year)) {
+			return null;
+		}
+		String pubDate = "".concat(year);
+		if (StringUtils.isNotBlank(month)) {
+			pubDate = pubDate.concat("-" + month);
+			if (StringUtils.isNotBlank(day)) {
+				pubDate = pubDate.concat("-" + day);
+			} else {
+				pubDate += "-01";
+			}
+		} else {
+			pubDate += "-01-01";
+		}
+		if (isValidDate(pubDate)) {
+			return pubDate;
+		}
+		return null;
+	}
+
+	protected boolean isValid(final JsonObject rootElement/* , final Reporter context */) {
+
+		final String type = getStringValue(rootElement, "type");
+		if (!typologiesMapping.containsKey(type)) {
+			logger.error("unknowntype_" + type);
+			if (errorsInvalidType != null) {
+				errorsInvalidType.add(1);
+			}
+			return false;
+		}
+
+		if (!isValidJsonArray(rootElement, "titles")) {
+			if (errorsInvalidTitle != null) {
+				errorsInvalidTitle.add(1);
+			}
+			return false;
+		}
+		return true;
+	}
+
+	private boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) {
+		if (!rootElement.has(fieldName)) {
+			return false;
+		}
+		final JsonElement jsonElement = rootElement.get(fieldName);
+		if (jsonElement.isJsonNull()) {
+			return false;
+		}
+		if (jsonElement.isJsonArray()) {
+			final JsonArray jsonArray = jsonElement.getAsJsonArray();
+			if (jsonArray.isJsonNull()) {
+				return false;
+			}
+			if (jsonArray.get(0).isJsonNull()) {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	private Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) {
+		final Qualifier qualifier = new Qualifier();
+		qualifier.setClassid(classId);
+		qualifier.setClassname(className);
+		qualifier.setSchemeid(schemeId);
+		qualifier.setSchemename(schemeName);
+		return qualifier;
+	}
+
+	private ExternalReference convertExtRef(String extId, String classId, String className, String schemeId,
+		String schemeName) {
+		ExternalReference ex = new ExternalReference();
+		ex.setRefidentifier(extId);
+		ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName));
+		return ex;
+	}
+
+	private StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) {
+		if (value == null | StringUtils.isBlank(value)) {
+			return null;
+		}
+
+		final StructuredProperty structuredProperty = new StructuredProperty();
+		structuredProperty.setValue(value);
+		structuredProperty.setQualifier(qualifier);
+		structuredProperty.setDataInfo(dataInfo);
+		return structuredProperty;
+	}
+
+	private Field<String> mapStringField(String value, DataInfo dataInfo) {
+		if (value == null || StringUtils.isBlank(value)) {
+			return null;
+		}
+
+		final Field<String> stringField = new Field<>();
+		stringField.setValue(value);
+		stringField.setDataInfo(dataInfo);
+		return stringField;
+	}
+
+	private KeyValue createCollectedFrom() {
+		KeyValue cf = new KeyValue();
+		cf.setValue(ORCID);
+		cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a");
+		return cf;
+	}
+
+	private KeyValue createHostedBy() {
+		KeyValue hb = new KeyValue();
+		hb.setValue("Unknown Repository");
+		hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c");
+		return hb;
+	}
+
+	private StructuredProperty mapAuthorId(String orcidId) {
+		final StructuredProperty sp = new StructuredProperty();
+		sp.setValue(orcidId);
+		final Qualifier q = new Qualifier();
+		q.setClassid(ORCID.toLowerCase());
+		q.setClassname(ORCID.toLowerCase());
+		q.setSchemeid(ModelConstants.DNET_PID_TYPES);
+		q.setSchemename(ModelConstants.DNET_PID_TYPES);
+		sp.setQualifier(q);
+		final DataInfo dataInfo = new DataInfo();
+		dataInfo.setDeletedbyinference(false);
+		dataInfo.setInferred(false);
+		dataInfo.setTrust("0.9");
+		dataInfo
+			.setProvenanceaction(
+				mapQualifier(
+					"sysimport:crosswalk:entityregistry",
+					"Harvested",
+					"dnet:provenanceActions",
+					"dnet:provenanceActions"));
+		sp.setDataInfo(dataInfo);
+		return sp;
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java
@ -0,0 +1,217 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.similarity;
+
+import java.io.IOException;
+import java.text.Normalizer;
+import java.util.*;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.similarity.JaroWinklerSimilarity;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.ximpleware.NavException;
+import com.ximpleware.ParseException;
+import com.ximpleware.XPathEvalException;
+import com.ximpleware.XPathParseException;
+
+import eu.dnetlib.dhp.parser.utility.VtdException;
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
+import eu.dnetlib.doiboost.orcidnodoi.model.Contributor;
+import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
+
+/**
+ * This class is used for searching from a list of publication contributors a
+ * specific author making a similarity check on both name and surname of the
+ * author with the credit name of each contributor of the list; as soon as
+ * the match is found (if exist) author informations are used to enrich the
+ * matched contribuotr inside contributors list
+ */
+
+public class AuthorMatcher {
+
+	private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class);
+	public static final Double threshold = 0.8;
+
+	public static void match(AuthorData author, List<Contributor> contributors)
+		throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
+
+		int matchCounter = 0;
+		List<Integer> matchCounters = Arrays.asList(matchCounter);
+		Contributor contributor = null;
+		contributors
+			.stream()
+			.filter(c -> !StringUtils.isBlank(c.getCreditName()))
+			.forEach(c -> {
+				if (simpleMatch(c.getCreditName(), author.getName()) ||
+					simpleMatch(c.getCreditName(), author.getSurname()) ||
+					simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
+					matchCounters.set(0, matchCounters.get(0) + 1);
+					c.setSimpleMatch(true);
+				}
+			});
+		if (matchCounters.get(0) == 1) {
+			updateAuthorsSimpleMatch(contributors, author);
+		} else if (matchCounters.get(0) == 0) {
+			Optional<Contributor> optCon = contributors
+				.stream()
+				.filter(c -> !StringUtils.isBlank(c.getCreditName()))
+				.map(c -> {
+					c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName()));
+					return c;
+				})
+				.filter(c -> c.getScore() >= threshold)
+				.max(Comparator.comparing(c -> c.getScore()));
+			Contributor bestMatchContributor = null;
+			if (optCon.isPresent()) {
+				bestMatchContributor = optCon.get();
+				bestMatchContributor.setBestMatch(true);
+				updateAuthorsSimilarityMatch(contributors, author);
+			}
+		} else if (matchCounters.get(0) > 1) {
+			Optional<Contributor> optCon = contributors
+				.stream()
+				.filter(c -> c.isSimpleMatch())
+				.filter(c -> !StringUtils.isBlank(c.getCreditName()))
+				.map(c -> {
+					c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName()));
+					return c;
+				})
+				.filter(c -> c.getScore() >= threshold)
+				.max(Comparator.comparing(c -> c.getScore()));
+			Contributor bestMatchContributor = null;
+			if (optCon.isPresent()) {
+				bestMatchContributor = optCon.get();
+				bestMatchContributor.setBestMatch(true);
+				updateAuthorsSimilarityMatch(contributors, author);
+			}
+		}
+
+	}
+
+	public static boolean simpleMatchOnOtherNames(String name, List<String> otherNames) {
+		if (otherNames == null || (otherNames != null && otherNames.isEmpty())) {
+			return false;
+		}
+		return otherNames.stream().filter(o -> simpleMatch(name, o)).count() > 0;
+	}
+
+	public static boolean simpleMatch(String name, String searchValue) {
+		if (searchValue == null) {
+			return false;
+		}
+		return normalize(name).contains(normalize(searchValue));
+	}
+
+	public static Double bestMatch(String authorSurname, String authorName, String contributor) {
+		String[] contributorSplitted = contributor.split(" ");
+		if (contributorSplitted.length == 0) {
+			return 0.0;
+		}
+		final String contributorName = contributorSplitted[contributorSplitted.length - 1];
+		String contributorSurname = "";
+		if (contributorSplitted.length > 1) {
+			StringJoiner joiner = new StringJoiner(" ");
+			for (int i = 0; i < contributorSplitted.length - 1; i++) {
+				joiner.add(contributorSplitted[i]);
+			}
+			contributorSurname = joiner.toString();
+		}
+		String authorNameNrm = normalize(authorName);
+		String authorSurnameNrm = normalize(authorSurname);
+		String contributorNameNrm = normalize(contributorName);
+		String contributorSurnameNrm = normalize(contributorSurname);
+		Double sm1 = similarity(authorNameNrm, authorSurnameNrm, contributorNameNrm, contributorSurnameNrm);
+		Double sm2 = similarity(authorNameNrm, authorSurnameNrm, contributorSurnameNrm, contributorNameNrm);
+		if (sm1.compareTo(sm2) >= 0) {
+			return sm1;
+		}
+		return sm2;
+	}
+
+	public static Double similarity(String nameA, String surnameA, String nameB, String surnameB) {
+		Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB);
+		return score;
+	}
+
+	private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) {
+		return new JaroWinklerSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB)));
+	}
+
+	public static String normalize(final String s) {
+		if (s == null) {
+			return new String("");
+		}
+		return nfd(s)
+			.toLowerCase()
+			// do not compact the regexes in a single expression, would cause StackOverflowError
+			// in case
+			// of large input strings
+			.replaceAll("(\\W)+", " ")
+			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+			.replaceAll("(\\p{Punct})+", " ")
+			.replaceAll("(\\d)+", " ")
+			.replaceAll("(\\n)+", " ")
+			.trim();
+	}
+
+	private static String nfd(final String s) {
+		return Normalizer.normalize(s, Normalizer.Form.NFD);
+	}
+
+	private static String parse(String name, String surname) {
+		return surname + " " + name;
+	}
+
+	public static void updateAuthorsSimpleMatch(List<Contributor> contributors, AuthorData author) {
+		contributors.forEach(c -> {
+			if (c.isSimpleMatch()) {
+				c.setName(author.getName());
+				c.setSurname(author.getSurname());
+				c.setOid(author.getOid());
+			}
+		});
+		updateRanks(contributors);
+	}
+
+	public static void updateAuthorsSimilarityMatch(List<Contributor> contributors, AuthorData author) {
+		contributors
+			.stream()
+			.filter(c -> c.isBestMatch())
+			.forEach(c -> {
+				c.setName(author.getName());
+				c.setSurname(author.getSurname());
+				c.setOid(author.getOid());
+			});
+		updateRanks(contributors);
+	}
+
+	private static void updateRanks(List<Contributor> contributors) {
+		boolean seqFound = false;
+		if (contributors
+			.stream()
+			.filter(
+				c -> c.getRole() != null && c.getSequence() != null &&
+					c.getRole().equals("author") && (c.getSequence().equals("first") ||
+						c.getSequence().equals("additional")))
+			.count() > 0) {
+			seqFound = true;
+		}
+		if (!seqFound) {
+			List<Integer> seqIds = Arrays.asList(0);
+			contributors.forEach(c -> {
+				int currentSeq = seqIds.get(0) + 1;
+				seqIds.set(0, currentSeq);
+				c.setSequence(Integer.toString(seqIds.get(0)));
+			});
+		}
+	}
+
+	private static String toJson(WorkDataNoDoi work) {
+		GsonBuilder builder = new GsonBuilder();
+		Gson gson = builder.create();
+		return gson.toJson(work);
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java
@ -0,0 +1,113 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.util;
+
+import java.text.SimpleDateFormat;
+import java.util.*;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+
+/**
+ * Utility class
+ */
+
+public class DumpToActionsUtility {
+
+	private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US);
+
+	public static String getStringValue(final JsonObject root, final String key) {
+		if (root.has(key) && !root.get(key).isJsonNull())
+			return root.get(key).getAsString();
+		return new String("");
+	}
+
+	public static List<String> getArrayValues(final JsonObject root, final String key) {
+		if (root.has(key) && root.get(key).isJsonArray()) {
+			final JsonArray asJsonArray = root.get(key).getAsJsonArray();
+			final List<String> result = new ArrayList<>();
+
+			asJsonArray.forEach(it -> {
+				if (StringUtils.isNotBlank(it.getAsString())) {
+					result.add(it.getAsString());
+				}
+			});
+			return result;
+		}
+		return new ArrayList<>();
+	}
+
+	public static List<JsonObject> getArrayObjects(final JsonObject root, final String key) {
+		if (root.has(key) && root.get(key).isJsonArray()) {
+			final JsonArray asJsonArray = root.get(key).getAsJsonArray();
+			final List<JsonObject> result = new ArrayList<>();
+			asJsonArray.forEach(it -> {
+				if (it.getAsJsonObject() != null) {
+					result.add(it.getAsJsonObject());
+				}
+			});
+			return result;
+		}
+		return new ArrayList<>();
+	}
+
+	public static boolean isValidDate(final String date) {
+		return date.matches("\\d{4}-\\d{2}-\\d{2}");
+	}
+
+	public static String now_ISO8601() { // NOPMD
+		String result;
+		synchronized (ISO8601FORMAT) {
+			result = ISO8601FORMAT.format(new Date());
+		}
+		// convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00
+		// - note the added colon for the Timezone
+		return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2);
+	}
+
+	public static String getDefaultResulttype(final String cobjcategory) {
+		switch (cobjcategory) {
+			case "0029":
+				return "software";
+			case "0021":
+			case "0024":
+			case "0025":
+			case "0030":
+				return "dataset";
+			case "0000":
+			case "0010":
+			case "0018":
+			case "0020":
+			case "0022":
+			case "0023":
+			case "0026":
+			case "0027":
+			case "0028":
+			case "0037":
+				return "other";
+			case "0001":
+			case "0002":
+			case "0004":
+			case "0005":
+			case "0006":
+			case "0007":
+			case "0008":
+			case "0009":
+			case "0011":
+			case "0012":
+			case "0013":
+			case "0014":
+			case "0015":
+			case "0016":
+			case "0017":
+			case "0019":
+			case "0031":
+			case "0032":
+				return "publication";
+			default:
+				return "publication";
+		}
+	}
+
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java
@ -0,0 +1,32 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.util;
+
+public class Pair<K, V> {
+
+	private K k;
+
+	private V v;
+
+	public Pair(K k, V v) {
+		this.k = k;
+		this.v = v;
+	}
+
+	public K getKey() {
+		return k;
+	}
+
+	public V getValue() {
+		return v;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (obj instanceof Pair<?, ?>) {
+			Pair<?, ?> tmp = (Pair<?, ?>) obj;
+			return k.equals(tmp.getKey()) && v.equals(tmp.getValue());
+		} else
+			return false;
+	}
+
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java
@ -0,0 +1,217 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.xml;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.ximpleware.*;
+
+import eu.dnetlib.dhp.parser.utility.VtdException;
+import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
+import eu.dnetlib.doiboost.orcidnodoi.model.Contributor;
+import eu.dnetlib.doiboost.orcidnodoi.model.ExternalId;
+import eu.dnetlib.doiboost.orcidnodoi.model.PublicationDate;
+import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
+
+/**
+ * This class is used for parsing xml data with vtd parser
+ */
+
+public class XMLRecordParserNoDoi {
+
+	private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class);
+
+	private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
+	private static final String NS_COMMON = "common";
+	private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
+	private static final String NS_PERSON = "person";
+	private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
+	private static final String NS_DETAILS = "personal-details";
+	private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
+	private static final String NS_OTHER = "other-name";
+	private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
+	private static final String NS_RECORD = "record";
+	private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
+
+	private static final String NS_WORK = "work";
+	private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
+
+	private static final String NS_ERROR = "error";
+
+	public static WorkDataNoDoi VTDParseWorkData(byte[] bytes)
+		throws VtdException, EncodingException, EOFException, EntityException, ParseException, XPathParseException,
+		NavException, XPathEvalException {
+		final VTDGen vg = new VTDGen();
+		vg.setDoc(bytes);
+		vg.parse(true);
+		final VTDNav vn = vg.getNav();
+		final AutoPilot ap = new AutoPilot(vn);
+		ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
+		ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
+		ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
+
+		WorkDataNoDoi workData = new WorkDataNoDoi();
+		final List<String> errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code");
+		if (!errors.isEmpty()) {
+			workData.setErrorCode(errors.get(0));
+			return workData;
+		}
+
+		List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
+			.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "put-code"));
+		if (!workNodes.isEmpty()) {
+			final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
+			workData.setOid(oid);
+			final String id = (workNodes.get(0).getAttributes().get("put-code"));
+			workData.setId(id);
+		} else {
+			return null;
+		}
+
+		final List<String> titles = VtdUtilityParser
+			.getTextValue(
+				ap, vn, "//common:title");
+		if (!titles.isEmpty()) {
+			workData.setTitles(titles);
+		}
+
+		final List<String> sourceNames = VtdUtilityParser
+			.getTextValue(
+				ap, vn, "//common:source-name");
+		if (!sourceNames.isEmpty()) {
+			workData.setSourceName(sourceNames.get(0));
+		}
+
+		final List<String> types = VtdUtilityParser
+			.getTextValue(
+				ap, vn, "//work:type");
+		if (!types.isEmpty()) {
+			workData.setType(types.get(0));
+		}
+
+		final List<String> urls = VtdUtilityParser
+			.getTextValue(
+				ap, vn, "//common:url");
+		if (!urls.isEmpty()) {
+			workData.setUrls(urls);
+		}
+
+		workData.setPublicationDates(getPublicationDates(vg, vn, ap));
+		workData.setExtIds(getExternalIds(vg, vn, ap));
+		workData.setContributors(getContributors(vg, vn, ap));
+		return workData;
+
+	}
+
+	private static List<PublicationDate> getPublicationDates(VTDGen vg, VTDNav vn, AutoPilot ap)
+		throws XPathParseException, NavException, XPathEvalException {
+		List<PublicationDate> publicationDates = new ArrayList<PublicationDate>();
+		int yearIndex = 0;
+		ap.selectXPath("//common:publication-date/common:year");
+		while (ap.evalXPath() != -1) {
+			PublicationDate publicationDate = new PublicationDate();
+			int t = vn.getText();
+			if (t >= 0) {
+				publicationDate.setYear(vn.toNormalizedString(t));
+				publicationDates.add(yearIndex, publicationDate);
+				yearIndex++;
+			}
+		}
+		int monthIndex = 0;
+		ap.selectXPath("//common:publication-date/common:month");
+		while (ap.evalXPath() != -1) {
+			int t = vn.getText();
+			if (t >= 0) {
+				publicationDates.get(monthIndex).setMonth(vn.toNormalizedString(t));
+				monthIndex++;
+			}
+		}
+		int dayIndex = 0;
+		ap.selectXPath("//common:publication-date/common:day");
+		while (ap.evalXPath() != -1) {
+			int t = vn.getText();
+			if (t >= 0) {
+				publicationDates.get(dayIndex).setDay(vn.toNormalizedString(t));
+				dayIndex++;
+			}
+		}
+		return publicationDates;
+	}
+
+	private static List<ExternalId> getExternalIds(VTDGen vg, VTDNav vn, AutoPilot ap)
+		throws XPathParseException, NavException, XPathEvalException {
+		List<ExternalId> extIds = new ArrayList<ExternalId>();
+		int typeIndex = 0;
+		ap.selectXPath("//common:external-id/common:external-id-type");
+		while (ap.evalXPath() != -1) {
+			ExternalId extId = new ExternalId();
+			int t = vn.getText();
+			if (t >= 0) {
+				extId.setType(vn.toNormalizedString(t));
+				extIds.add(typeIndex, extId);
+				typeIndex++;
+			}
+		}
+		int valueIndex = 0;
+		ap.selectXPath("//common:external-id/common:external-id-value");
+		while (ap.evalXPath() != -1) {
+			int t = vn.getText();
+			if (t >= 0) {
+				extIds.get(valueIndex).setValue(vn.toNormalizedString(t));
+				valueIndex++;
+			}
+		}
+		int relationshipIndex = 0;
+		ap.selectXPath("//common:external-id/common:external-id-relationship");
+		while (ap.evalXPath() != -1) {
+			int t = vn.getText();
+			if (t >= 0) {
+				extIds.get(relationshipIndex).setRelationShip(vn.toNormalizedString(t));
+				relationshipIndex++;
+			}
+		}
+		if (typeIndex == valueIndex) {
+			return extIds;
+		}
+		return new ArrayList<ExternalId>();
+	}
+
+	private static List<Contributor> getContributors(VTDGen vg, VTDNav vn, AutoPilot ap)
+		throws XPathParseException, NavException, XPathEvalException {
+		List<Contributor> contributors = new ArrayList<Contributor>();
+		ap.selectXPath("//work:contributors/work:contributor");
+		while (ap.evalXPath() != -1) {
+			Contributor contributor = new Contributor();
+			if (vn.toElement(VTDNav.FIRST_CHILD, "work:credit-name")) {
+				int val = vn.getText();
+				if (val != -1) {
+					contributor.setCreditName(vn.toNormalizedString(val));
+				}
+				vn.toElement(VTDNav.PARENT);
+			}
+			if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-attributes")) {
+				if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-sequence")) {
+					int val = vn.getText();
+					if (val != -1) {
+						contributor.setSequence(vn.toNormalizedString(val));
+					}
+					vn.toElement(VTDNav.PARENT);
+				}
+				if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-role")) {
+					int val = vn.getText();
+					if (val != -1) {
+						contributor.setRole(vn.toNormalizedString(val));
+					}
+					vn.toElement(VTDNav.PARENT);
+				}
+				vn.toElement(VTDNav.PARENT);
+			}
+			contributors.add(contributor);
+		}
+		return contributors;
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_dois_data.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_dois_data.json
@ -1,6 +1,6 @@
 [
  {"paramName":"n",   "paramLongName":"hdfsServerUri",	"paramDescription": "the server uri",   "paramRequired": true},
-  {"paramName":"d",   "paramLongName":"hdfsOrcidDefaultPath",	"paramDescription": "the default work path",	"paramRequired": true},
+  {"paramName":"w",   "paramLongName":"workingPath",	"paramDescription": "the default work path",	"paramRequired": true},
  {"paramName":"f",   "paramLongName":"activitiesFileNameTarGz",	"paramDescription": "the name of the activities orcid file",	"paramRequired": true},
  {"paramName":"o",   "paramLongName":"outputAuthorsDOIsPath",	"paramDescription": "the relative folder of the sequencial file to write",	"paramRequired": true}
 ]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml
@ -23,76 +23,74 @@

    </parameters>

-    <start to="ExtractCrossrefToOAF"/>
+    <start to="ImportCrossRef"/>


    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="ResetWorkingPath">
-        <fs>
-            <delete path='${workingPath}/input/crossref/index_dump'/>
-<!--            <mkdir path='${workingPath}/input/crossref'/>-->
-        </fs>
-        <ok to="ImportCrossRef"/>
-        <error to="Kill"/>
-    </action>
-
-
-
    <action name="ImportCrossRef">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.doiboost.crossref.CrossrefImporter</main-class>
-            <arg>-t</arg><arg>${workingPath}/input/crossref/index_dump_1</arg>
+            <arg>-t</arg><arg>${workingPath}/input/crossref/index_update</arg>
            <arg>-n</arg><arg>${nameNode}</arg>
            <arg>-ts</arg><arg>${timestamp}</arg>
        </java>
-        <ok to="End"/>
+        <ok to="GenerateDataset"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="GenerateDataset">
+            <spark xmlns="uri:oozie:spark-action:0.2">
+                <master>yarn-cluster</master>
+                <mode>cluster</mode>
+                <name>ExtractCrossrefToOAF</name>
+                <class>eu.dnetlib.doiboost.crossref.CrossrefDataset</class>
+                <jar>dhp-doiboost-${projectVersion}.jar</jar>
+                <spark-opts>
+                    --executor-memory=${sparkExecutorMemory}
+                    --executor-cores=${sparkExecutorCores}
+                    --driver-memory=${sparkDriverMemory}
+                    --conf spark.sql.shuffle.partitions=3840
+                    ${sparkExtraOPT}
+                </spark-opts>
+                <arg>--workingPath</arg><arg>/data/doiboost/input/crossref</arg>
+                <arg>--master</arg><arg>yarn-cluster</arg>
+            </spark>
+            <ok to="RenameDataset"/>
+            <error to="Kill"/>
+    </action>
+
+    <action name="RenameDataset">
+        <fs>
+            <delete path='${workingPath}/input/crossref/crossref_ds'/>
+            <move source="${workingPath}/input/crossref/crossref_ds_updated"
+                  target="${workingPath}/input/crossref/crossref_ds"/>
+        </fs>
+        <ok to="ConvertCrossrefToOAF"/>
        <error to="Kill"/>
    </action>


-    <action name="ExtractCrossrefToOAF">
+    <action name="ConvertCrossrefToOAF">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
-            <name>ExtractCrossrefToOAF</name>
+            <name>ConvertCrossrefToOAF</name>
            <class>eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF</class>
            <jar>dhp-doiboost-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
                ${sparkExtraOPT}
            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingPath}/input/crossref/index_dump,${workingPath}/input/crossref/index_dump_1,${workingPath}/crossref/index_dump</arg>
-            <arg>--targetPath</arg><arg>${workingPath}/input/crossref</arg>
-            <arg>--master</arg><arg>yarn-cluster</arg>
-        </spark>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-
-
-
-    <action name="GenerateDataset">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn-cluster</master>
-            <mode>cluster</mode>
-            <name>ExtractCrossrefToOAF</name>
-            <class>eu.dnetlib.doiboost.crossref.CrossrefDataset</class>
-            <jar>dhp-doiboost-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                ${sparkExtraOPT}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>/data/doiboost/crossref/cr_dataset</arg>
-            <arg>--targetPath</arg><arg>/data/doiboost/crossref/crossrefDataset</arg>
+            <arg>--sourcePath</arg><arg>${workingPath}/input/crossref/crossref_ds</arg>
+            <arg>--targetPath</arg><arg>${workingPath}/process/</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json
@ -1,6 +1,5 @@
 [
-  {"paramName":"s",   "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read",  "paramRequired": true},
-  {"paramName":"t",   "paramLongName":"targetPath", "paramDescription": "the working dir path",                      "paramRequired": true},
+  {"paramName":"w",   "paramLongName":"workingPath", "paramDescription": "the working dir path",                      "paramRequired": true},
  {"paramName":"m",   "paramLongName":"master",     "paramDescription": "the master name",                          "paramRequired": true}

 ]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_orcid_data.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_orcid_data.json
@ -1,6 +1,6 @@
 [
  {"paramName":"n",   "paramLongName":"hdfsServerUri",	"paramDescription": "the server uri",   "paramRequired": true},
-  {"paramName":"d",   "paramLongName":"hdfsOrcidDefaultPath",	"paramDescription": "the default work path",	"paramRequired": true},
+  {"paramName":"w",   "paramLongName":"workingPath",	"paramDescription": "the default work path",	"paramRequired": true},
  {"paramName":"f",   "paramLongName":"lambdaFileName",	"paramDescription": "the name of the lambda file",	"paramRequired": true},
  {"paramName":"o",   "paramLongName":"outputPath",	"paramDescription": "the relative folder of the sequencial file to write",	"paramRequired": true},
  {"paramName":"t",   "paramLongName":"token",	"paramDescription": "token to grant access",	"paramRequired": true}
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json
@ -0,0 +1,7 @@
+[
+ {"paramName":"n",   "paramLongName":"hdfsServerUri",	"paramDescription": "the server uri",   "paramRequired": true},
+ {"paramName":"w",   "paramLongName":"workingPath",	"paramDescription": "the default work path",	"paramRequired": true},
+ {"paramName":"f",   "paramLongName":"activitiesFileNameTarGz",	"paramDescription": "the name of the activities orcid file",	"paramRequired": true},
+ {"paramName":"ow",   "paramLongName":"outputWorksPath",	"paramDescription": "the relative folder of the sequencial file to write",	"paramRequired": true},
+ {"paramName":"oew",   "paramLongName":"outputEnrichedWorksPath",	"paramDescription": "the relative folder of the sequencial file to write the data",	"paramRequired": true}
+]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json
@ -1,6 +1,6 @@
 [
  {"paramName":"n",   "paramLongName":"hdfsServerUri",	"paramDescription": "the server uri",   "paramRequired": true},
-  {"paramName":"d",   "paramLongName":"hdfsOrcidDefaultPath",	"paramDescription": "the default work path",	"paramRequired": true},
+  {"paramName":"w",   "paramLongName":"workingPath",	"paramDescription": "the default work path",	"paramRequired": true},
  {"paramName":"f",   "paramLongName":"summariesFileNameTarGz",	"paramDescription": "the name of the summaries orcid file",	"paramRequired": true},
  {"paramName":"o",   "paramLongName":"outputAuthorsPath",	"paramDescription": "the relative folder of the sequencial file to write",	"paramRequired": true}
 ]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json
@ -1,4 +0,0 @@
-[{"paramName":"w",   "paramLongName":"workingPath",	"paramDescription": "the working path",	"paramRequired": true},
- {"paramName":"t",   "paramLongName":"token",	"paramDescription": "token to grant access",	"paramRequired": true},
- {"paramName":"o",   "paramLongName":"outputAuthorsPath",	"paramDescription": "the relative folder of the sequencial file to write the authors data",	"paramRequired": true}
-]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json
@ -0,0 +1,7 @@
+[
+ {"paramName":"n",   "paramLongName":"hdfsServerUri",	"paramDescription": "the server uri",   "paramRequired": true},
+ {"paramName":"w",   "paramLongName":"workingPath",	"paramDescription": "the default work path",	"paramRequired": true},
+ {"paramName":"f",   "paramLongName":"activitiesFileNameTarGz",	"paramDescription": "the name of the activities orcid file",	"paramRequired": true},
+ {"paramName":"ow",   "paramLongName":"outputWorksPath",	"paramDescription": "the relative folder of the sequencial file to write",	"paramRequired": true},
+ {"paramName":"oew",   "paramLongName":"outputEnrichedWorksPath",	"paramDescription": "the relative folder of the sequencial file to write the data",	"paramRequired": true}
+]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml
@ -39,14 +39,7 @@
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="ResetWorkingPath">
-        <fs>
-            <delete path='${workingDirPath}'/>
-            <mkdir path='${workingDirPath}'/>
-        </fs>
-        <ok to="CreateDOIBoost"/>
-        <error to="Kill"/>
-    </action>
+

    <action name="CreateDOIBoost">
        <spark xmlns="uri:oozie:spark-action:0.2">
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml
@ -8,6 +8,10 @@
            <name>targetPath</name>
            <description>the working dir base path</description>
        </property>
+        <property>
+            <name>workingPath</name>
+            <description>the working dir base path</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -31,10 +35,10 @@

    <action name="ResetWorkingPath">
        <fs>
-            <delete path='${targetPath}'/>
-            <mkdir path='${targetPath}'/>
+            <delete path='${workingPath}'/>
+            <mkdir path='${workingPath}'/>
        </fs>
-        <ok to="PreprocessMag"/>
+        <ok to="ConvertMagToDataset"/>
        <error to="Kill"/>
    </action>

@ -52,10 +56,10 @@
                ${sparkExtraOPT}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
-            <arg>--targetPath</arg><arg>${targetPath}</arg>
+            <arg>--targetPath</arg><arg>${workingPath}</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
-        <ok to="End"/>
+        <ok to="PreprocessMag"/>
        <error to="Kill"/>
    </action>

@ -65,7 +69,7 @@
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
-            <name>Convert Mag to Dataset</name>
+            <name>Convert Mag to OAF Dataset</name>
            <class>eu.dnetlib.doiboost.mag.SparkPreProcessMAG</class>
            <jar>dhp-doiboost-${projectVersion}.jar</jar>
            <spark-opts>
@ -75,7 +79,8 @@
                --conf spark.sql.shuffle.partitions=3840
                ${sparkExtraOPT}
            </spark-opts>
-            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--sourcePath</arg><arg>${workingPath}</arg>
+            <arg>--workingPath</arg><arg>${workingPath}/process</arg>
            <arg>--targetPath</arg><arg>${targetPath}</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json
@ -1,6 +1,7 @@
 [
  {"paramName":"s",   "paramLongName":"sourcePath", "paramDescription": "the base path of MAG input",  "paramRequired": true},
-  {"paramName":"t",   "paramLongName":"targetPath", "paramDescription": "the working dir path",                      "paramRequired": true},
+  {"paramName":"t",   "paramLongName":"targetPath", "paramDescription": "the target dir path",                      "paramRequired": true},
+  {"paramName":"w",   "paramLongName":"workingPath", "paramDescription": "the working dir path",                      "paramRequired": true},
  {"paramName":"m",   "paramLongName":"master",     "paramDescription": "the master name",                          "paramRequired": true}

 ]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml
@ -0,0 +1,31 @@
+<configuration>
+    <property>
+        <name>oozie.action.sharelib.for.java</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.map.java.opts</name>
+        <value>-Xmx2g</value>
+    </property>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml
@ -0,0 +1,542 @@
+<workflow-app name="Gen Orcid Works-no-doi From Activities" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>workingPath</name>
+            <description>the working dir base path</description>
+        </property>
+        <property>
+            <name>shell_cmd_0</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/25002232 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_0.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_0.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_0.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 0</description>
+        </property>
+        <property>
+            <name>shell_cmd_1</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/25002088 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_1.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_1.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_1.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 1</description>
+        </property>
+        <property>
+            <name>shell_cmd_2</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/25000596 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_2.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_2.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_2.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 2</description>
+        </property>
+        <property>
+            <name>shell_cmd_3</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/25015150 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_3.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_3.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_3.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 3</description>
+        </property>
+        <property>
+            <name>shell_cmd_4</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/25033643 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_4.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_4.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_4.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 4</description>
+        </property>
+        <property>
+            <name>shell_cmd_5</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/25005483 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_5.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_5.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_5.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 5</description>
+        </property>
+        <property>
+            <name>shell_cmd_6</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/25005425 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_6.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_6.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_6.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 6</description>
+        </property>
+        <property>
+            <name>shell_cmd_7</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/25012016 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_7.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_7.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_7.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 7</description>
+        </property>
+        <property>
+            <name>shell_cmd_8</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/25012079 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_8.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_8.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_8.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 8</description>
+        </property>
+        <property>
+            <name>shell_cmd_9</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/25010727 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_9.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_9.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_9.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file 9</description>
+        </property>
+        <property>
+            <name>shell_cmd_X</name>
+            <value>wget -O /tmp/ORCID_2020_10_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/25011025 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_X.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_X.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_X.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid activity file X</description>
+        </property>
+    </parameters>
+
+    <start to="ResetWorkingPath"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    
+    <action name="ResetWorkingPath">
+        <fs>
+            <delete path='${workingPath}/no_doi_works/*'/>
+        </fs>
+        <ok to="fork_check_download_files"/>
+        <error to="Kill"/>
+    </action>
+    
+    <fork name = "fork_check_download_files">
+      <path start = "check_exist_on_hdfs_activities_0"/>
+      <path start = "check_exist_on_hdfs_activities_1"/>
+      <path start = "check_exist_on_hdfs_activities_2"/>
+	  <path start = "check_exist_on_hdfs_activities_3"/>
+	  <path start = "check_exist_on_hdfs_activities_4"/>
+	  <path start = "check_exist_on_hdfs_activities_5"/>
+	  <path start = "check_exist_on_hdfs_activities_6"/>
+	  <path start = "check_exist_on_hdfs_activities_7"/>
+	  <path start = "check_exist_on_hdfs_activities_8"/>
+	  <path start = "check_exist_on_hdfs_activities_9"/>
+	  <path start = "check_exist_on_hdfs_activities_X"/>
+   	</fork>
+   	
+    <decision name="check_exist_on_hdfs_activities_0">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_0.tar.gz'))}
+            </case>
+            <default to="Download_0" />
+         </switch>
+	</decision>
+	
+    <action name="Download_0">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_0}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_0">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_0.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_0.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_1">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_1.tar.gz'))}
+            </case>
+            <default to="Download_1" />
+         </switch>
+	</decision>
+	
+    <action name="Download_1">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_1}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_1">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_1.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_1.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_2">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_2.tar.gz'))}
+            </case>
+            <default to="Download_2" />
+         </switch>
+	</decision>
+	
+    <action name="Download_2">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_2}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_2">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_2.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_2.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_3">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_3.tar.gz'))}
+            </case>
+            <default to="Download_3" />
+         </switch>
+	</decision>
+	
+    <action name="Download_3">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_3}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_3">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_3.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_3.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_4">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_4.tar.gz'))}
+            </case>
+            <default to="Download_4" />
+         </switch>
+	</decision>
+	
+    <action name="Download_4">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_4}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_4">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_4.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_4.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_5">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_5.tar.gz'))}
+            </case>
+            <default to="Download_5" />
+         </switch>
+	</decision>
+	
+    <action name="Download_5">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_5}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_5">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_5.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_5.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_6">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_6.tar.gz'))}
+            </case>
+            <default to="Download_6" />
+         </switch>
+	</decision>
+	
+    <action name="Download_6">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_6}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_6">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_6.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_6.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    
+    <decision name="check_exist_on_hdfs_activities_7">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_7.tar.gz'))}
+            </case>
+            <default to="Download_7" />
+         </switch>
+	</decision>
+	
+    <action name="Download_7">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_7}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_7">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_7.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_7.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_8">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_8.tar.gz'))}
+            </case>
+            <default to="Download_8" />
+         </switch>
+	</decision>
+	
+    <action name="Download_8">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_8}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_8">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_8.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_8.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_9">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_9.tar.gz'))}
+            </case>
+            <default to="Download_9" />
+         </switch>
+	</decision>
+	
+    <action name="Download_9">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_9}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_9">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_9.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_9.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+    
+    <decision name="check_exist_on_hdfs_activities_X">
+         <switch>
+            <case to="wait_download_phase_node">
+              ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_X.tar.gz'))}
+            </case>
+            <default to="Download_X" />
+         </switch>
+	</decision>
+	
+    <action name="Download_X">
+		<shell xmlns="uri:oozie:shell-action:0.1">
+		<job-tracker>${jobTracker}</job-tracker>
+		<name-node>${nameNode}</name-node>
+		<exec>bash</exec>
+	    <argument>-c</argument>
+	    <argument>${shell_cmd_X}</argument>
+		<capture-output/>
+		</shell>
+	<ok to="wait_download_phase_node"/>
+	<error to="Kill"/>
+	</action>
+	
+	<action name="GenOrcidAuthorWork_X">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_X.tar.gz</arg>
+            <arg>-ow</arg><arg>no_doi_works/works_X.seq</arg>
+            <arg>-oew</arg><arg>no_doi_enriched_works/</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name = "wait_download_phase_node" to = "fork_gen_orcid_author_work"/>
+
+    <fork name = "fork_gen_orcid_author_work">
+        <path start = "GenOrcidAuthorWork_0"/>
+        <path start = "GenOrcidAuthorWork_1"/>
+        <path start = "GenOrcidAuthorWork_2"/>
+        <path start = "GenOrcidAuthorWork_3"/>
+        <path start = "GenOrcidAuthorWork_4"/>
+        <path start = "GenOrcidAuthorWork_5"/>
+        <path start = "GenOrcidAuthorWork_6"/>
+        <path start = "GenOrcidAuthorWork_7"/>
+        <path start = "GenOrcidAuthorWork_8"/>
+        <path start = "GenOrcidAuthorWork_9"/>
+        <path start = "GenOrcidAuthorWork_X"/>
+    </fork>
+
+    <join name = "join_node" to = "End"/>
+
+<!--    <join name = "join_node" to = "fork_gen_orcid_author_work_2"/>-->
+
+<!--    <fork name = "fork_gen_orcid_author_work_2">-->
+<!--        <path start = "GenOrcidAuthorWork_6"/>-->
+<!--        <path start = "GenOrcidAuthorWork_7"/>-->
+<!--        <path start = "GenOrcidAuthorWork_8"/>-->
+<!--        <path start = "GenOrcidAuthorWork_9"/>-->
+<!--        <path start = "GenOrcidAuthorWork_X"/>-->
+<!--    </fork>-->
+
+<!--    <join name = "join_node_2" to = "End"/>-->
+
+   <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_download/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_download/oozie_app/workflow.xml
@ -1,45 +0,0 @@
-<workflow-app name="Orcid Download" xmlns="uri:oozie:workflow:0.5">
-    <parameters>
-        <property>
-            <name>workingPathOrcid</name>
-            <description>the working dir base path</description>
-        </property>
-        <property>
-            <name>token</name>
-            <description>access token</description>
-        </property>
-    </parameters>
-    
-    <start to="ResetWorkingPath"/>
-    
-    
-    <kill name="Kill">
-        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-    </kill>
-    
-    <action name="ResetWorkingPath">
-        <fs>
-            <delete path='${workingPathOrcid}/download'/>
-            <mkdir path='${workingPathOrcid}/download'/>
-        </fs>
-        <ok to="DownloadOrcidData"/>
-        <error to="Kill"/>
-    </action>
-	
-	<action name="DownloadOrcidData">
-        <java>
-            <job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <main-class>eu.dnetlib.doiboost.orcid.OrcidDownloader</main-class>
-            <arg>-d</arg><arg>${workingPathOrcid}/</arg>
-            <arg>-n</arg><arg>${nameNode}</arg>
-            <arg>-f</arg><arg>last_modified.csv</arg>
-            <arg>-o</arg><arg>download/</arg>
-            <arg>-t</arg><arg>${token}</arg>
-        </java>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-    
-   <end name="End"/>
-</workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml
@ -0,0 +1,232 @@
+<workflow-app name="Extract Orcid XML Works From Activities" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>workingPath</name>
+            <description>the working dir base path</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.java</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapreduce.user.classpath.first</name>
+                <value>true</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapreduce.map.java.opts</name>
+                <value>-Xmx2g</value>
+            </property>
+            <property>
+                <name>oozie.use.system.libpath</name>
+                <value>true</value>
+            </property>
+        </configuration>
+    </global>
+
+    <start to="ResetWorkingPath"/>
+
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="ResetWorkingPath">
+        <fs>
+            <delete path='${workingPath}/xml/works'/>
+            <mkdir path='${workingPath}/xml/works'/>
+        </fs>
+        <ok to="fork_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <fork name = "fork_node">
+        <path start = "ExtractXMLWorkActivities_0"/>
+        <path start = "ExtractXMLWorkActivities_1"/>
+        <path start = "ExtractXMLWorkActivities_2"/>
+        <path start = "ExtractXMLWorkActivities_3"/>
+        <path start = "ExtractXMLWorkActivities_4"/>
+        <path start = "ExtractXMLWorkActivities_5"/>
+        <path start = "ExtractXMLWorkActivities_6"/>
+        <path start = "ExtractXMLWorkActivities_7"/>
+        <path start = "ExtractXMLWorkActivities_8"/>
+        <path start = "ExtractXMLWorkActivities_9"/>
+        <path start = "ExtractXMLWorkActivities_X"/>
+    </fork>
+    
+    <action name="ExtractXMLWorkActivities_0">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_0.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_0.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ExtractXMLWorkActivities_1">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_1.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_1.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ExtractXMLWorkActivities_2">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_2.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_2.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ExtractXMLWorkActivities_3">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_3.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_3.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ExtractXMLWorkActivities_4">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_4.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_4.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ExtractXMLWorkActivities_5">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_5.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_5.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <action name="ExtractXMLWorkActivities_6">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_6.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_6.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ExtractXMLWorkActivities_7">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_7.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_7.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <action name="ExtractXMLWorkActivities_8">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_8.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_8.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ExtractXMLWorkActivities_9">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_9.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_9.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="ExtractXMLWorkActivities_X">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_activites_X.tar.gz</arg>
+            <arg>-ow</arg><arg>xml/works/xml_works_X.seq</arg>
+            <arg>-oew</arg><arg>---</arg>
+        </java>
+        <ok to="join_node"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name = "join_node" to = "End"/>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml
@ -0,0 +1,26 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.map.java.opts</name>
+        <value>-Xmx8g</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml
@ -1,41 +1,40 @@
-<workflow-app name="import Orcid" xmlns="uri:oozie:workflow:0.5">
+<workflow-app name="Extract Orcid XML Authors From Summaries" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>workingPath</name>
            <description>the working dir base path</description>
        </property>
    </parameters>
-    
+
    <start to="ResetWorkingPath"/>
-    
-    
+
+
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
-    
+
    <action name="ResetWorkingPath">
        <fs>
-            <delete path='${workingPath}/output'/>
-            <mkdir path='${workingPath}/output'/>
+            <delete path='${workingPath}/xml/authors'/>
+            <mkdir path='${workingPath}/xml/authors'/>
        </fs>
-        <ok to="ImportOrcidSummary"/>
+        <ok to="ExtractXMLAuthorsSummaries"/>
        <error to="Kill"/>
    </action>
-    
-    
-    
-    <action name="ImportOrcidSummary">
+
+    <action name="ExtractXMLAuthorsSummaries">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
-            <main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
-            <arg>-d</arg><arg>${workingPath}/</arg>
+            <main-class>eu.dnetlib.doiboost.orcid.ExtractXMLSummariesData</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
            <arg>-n</arg><arg>${nameNode}</arg>
-            <arg>-f</arg><arg>ORCID_2019_summaries.tar.gz</arg>
-            <arg>-o</arg><arg>output/</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_summaries.tar.gz</arg>
+            <arg>-o</arg><arg>xml/authors/</arg>
        </java>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
+
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/config-default.xml
@ -1,22 +0,0 @@
-<configuration>
-    <property>
-            <name>jobTracker</name>
-            <value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
-    </property>
-    <property>
-            <name>nameNode</name>
-            <value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
-    </property>
-    <property>
-            <name>queueName</name>
-            <value>default</value>
-    </property>
-    <property>
-        <name>oozie.use.system.libpath</name>
-        <value>true</value>
-    </property>
-    <property>
-        <name>oozie.action.sharelib.for.spark</name>
-        <value>spark2</value>
-    </property>
-</configuration>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/workflow.xml
@ -1,83 +0,0 @@
-<workflow-app name="Gen Orcid Authors" xmlns="uri:oozie:workflow:0.5">
-    <parameters>
-        <property>
-            <name>workingPath</name>
-            <description>the working dir base path</description>
-        </property>
-        <property>
-            <name>token</name>
-            <description>access token</description>
-        </property>
-        <property>
-            <name>sparkDriverMemory</name>
-            <description>memory for driver process</description>
-        </property>
-        <property>
-            <name>sparkExecutorMemory</name>
-            <description>memory for individual executor</description>
-        </property>
-        <property>
-            <name>sparkExecutorCores</name>
-            <description>number of cores used by single executor</description>
-        </property>
-        <property>
-            <name>outputPath</name>
-            <description>the working dir base path</description>
-        </property>
-    </parameters>
-    
-    <start to="ResetWorkingPath"/>
-    
-    
-    <kill name="Kill">
-        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-    </kill>
-    
-    <action name="ResetWorkingPath">
-        <fs>
-            <delete path='${workingPath_activities}/authors'/>
-        </fs>
-        <ok to="Gen_Orcid_Authors"/>
-        <error to="Kill"/>
-    </action>
-	
-	<action name="Split_Lambda_File">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-        	<job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Split_Lambda_File</name>
-            <class>eu.dnetlib.doiboost.orcid.SparkPartitionLambdaFile</class>
-            <jar>dhp-doiboost-1.2.1-SNAPSHOT.jar</jar>
-            <spark-opts>--num-executors 24 --conf spark.yarn.jars=&quot;hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2&quot; --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory}
-            </spark-opts>
-            <arg>-w</arg><arg>${workingPath}/</arg>
-            <arg>-o</arg><arg>authors/</arg>
-            <arg>-t</arg><arg>${token}</arg>
-        </spark>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-    
-	<action name="Gen_Orcid_Authors">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-        	<job-tracker>${jobTracker}</job-tracker>
-            <name-node>${nameNode}</name-node>
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Gen_Orcid_Authors</name>
-            <class>eu.dnetlib.doiboost.orcid.SparkOrcidGenerateAuthors</class>
-            <jar>dhp-doiboost-1.2.1-SNAPSHOT.jar</jar>
-            <spark-opts>--num-executors 20 --conf spark.yarn.jars=&quot;hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2&quot; --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory}
-            </spark-opts>
-            <arg>-w</arg><arg>${workingPath}/</arg>
-            <arg>-o</arg><arg>authors/</arg>
-            <arg>-t</arg><arg>${token}</arg>
-        </spark>
-        <ok to="End"/>
-        <error to="Kill"/>
-    </action>
-    
-   <end name="End"/>
-</workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml
@ -0,0 +1,26 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.map.java.opts</name>
+        <value>-Xmx8g</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml
@ -0,0 +1,68 @@
+<workflow-app name="Gen Orcid Authors From Summaries" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>workingPath</name>
+            <description>the working dir base path</description>
+        </property>
+        <property>
+            <name>shell_cmd_0</name>
+            <value>wget -O /tmp/ORCID_2020_10_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/25032905 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_summaries.tar.gz /data/orcid_activities_2020/ORCID_2020_10_summaries.tar.gz ; rm -f /tmp/ORCID_2020_10_summaries.tar.gz
+            </value>
+            <description>the shell command that downloads and puts to hdfs orcid summaries</description>
+        </property>
+    </parameters>
+    
+    <start to="ResetWorkingPath"/>
+    
+    
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    
+    <action name="ResetWorkingPath">
+        <fs>
+            <delete path='${workingPath}/authors'/>
+            <mkdir path='${workingPath}/authors'/>
+        </fs>
+        <ok to="check_exist_on_hdfs_summaries"/>
+        <error to="Kill"/>
+    </action>
+
+    <decision name="check_exist_on_hdfs_summaries">
+        <switch>
+            <case to="ImportOrcidSummaries">
+                ${fs:exists(concat(workingPath,'/ORCID_2020_10_summaries.tar.gz'))}
+            </case>
+            <default to="DownloadSummaries" />
+        </switch>
+    </decision>
+
+    <action name="DownloadSummaries">
+        <shell xmlns="uri:oozie:shell-action:0.1">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <exec>bash</exec>
+            <argument>-c</argument>
+            <argument>${shell_cmd_0}</argument>
+            <capture-output/>
+        </shell>
+        <ok to="ImportOrcidSummaries"/>
+        <error to="Kill"/>
+    </action>
+    
+    <action name="ImportOrcidSummaries">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>ORCID_2020_10_summaries.tar.gz</arg>
+            <arg>-o</arg><arg>authors/</arg>
+        </java>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/config-default.xml
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml
@ -0,0 +1,168 @@
+<workflow-app name="Orcid Updates Download" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>workingPath</name>
+            <description>the working dir base path</description>
+        </property>
+        <property>
+            <name>token</name>
+            <description>access token</description>
+        </property>
+        <property>
+            <name>shell_cmd</name>
+            <value>wget -O /tmp/last_modified.csv.tar http://74804fb637bd8e2fba5b-e0a029c2f87486cddec3b416996a6057.r3.cf1.rackcdn.com/last_modified.csv.tar ; hdfs dfs -copyFromLocal /tmp/last_modified.csv.tar /data/orcid_activities_2020/last_modified.csv.tar ; rm -f /tmp/last_modified.csv.tar
+            </value>
+            <description>the shell command that downloads the lambda file from orcid containing last orcid update informations</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <value>7G</value>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <value>2G</value>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <value>1</value>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>spark2MaxExecutors</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
+    <start to="DownloadOrcidAuthors"/>
+    
+    
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    
+    <action name="ResetWorkingPath">
+        <fs>
+            <delete path='${workingPath}/downloads'/>
+            <delete path='${workingPath}/last_modified.csv.tar'/>
+            <mkdir path='${workingPath}/downloads'/>
+        </fs>
+        <ok to="DownloadLambdaFile"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="DownloadLambdaFile">
+        <shell xmlns="uri:oozie:shell-action:0.1">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <exec>bash</exec>
+            <argument>-c</argument>
+            <argument>${shell_cmd}</argument>
+            <capture-output/>
+        </shell>
+        <ok to="DownloadUpdatedXMLAuthors"/>
+        <error to="Kill"/>
+    </action>
+
+	<action name="DownloadUpdatedXMLAuthors">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.orcid.OrcidDownloader</main-class>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>last_modified.csv.tar</arg>
+            <arg>-o</arg><arg>downloads/</arg>
+            <arg>-t</arg><arg>${token}</arg>
+        </java>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="GenLastModifiedSeq">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>GenLastModifiedSeq</name>
+            <class>eu.dnetlib.doiboost.orcid.SparkGenLastModifiedSeq</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+            </spark-opts>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>last_modified.csv.tar</arg>
+            <arg>-o</arg><arg>last_modified.seq</arg>
+            <arg>-t</arg><arg>-</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="DownloadOrcidAuthors">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>DownloadOrcidAuthors</name>
+            <class>eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --conf spark.dynamicAllocation.enabled=true
+                --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+            </spark-opts>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>last_modified.seq</arg>
+            <arg>-o</arg><arg>downloads/updated_authors</arg>
+            <arg>-t</arg><arg>${token}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    
+   <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json
@ -0,0 +1,41 @@
+{
+  "reference-entry":      {"cobj":"0013", "value":	"Part of book or chapter of book"},
+  "report":               {"cobj":"0017", "value":	"Report"},
+  "dataset":              {"cobj":"0021", "value":	"Dataset"},
+  "journal-article":      {"cobj":"0001", "value":	"Article"},
+  "reference-book":       {"cobj":"0002", "value":	"Book"},
+  "other":                {"cobj":"0020", "value":	"Other ORP type"},
+  "proceedings-article":  {"cobj":"0004", "value":	"Conference object"},
+  "standard":             {"cobj":"0038", "value":	"Other literature type"},
+  "book-part":            {"cobj":"0002", "value":	"Book"},
+  "monograph":            {"cobj":"0002", "value":	"Book"},
+  "report-series":        {"cobj":"0017", "value":	"Report"},
+  "book":                 {"cobj":"0002", "value":	"Book"},
+  "book-chapter":         {"cobj":"0013", "value":	"Part of book or chapter of book"},
+  "peer-review":          {"cobj":"0015", "value":	"Review"},
+  "book-section":         {"cobj":"0013", "value":	"Part of book or chapter of book"},
+  "book-review":          {"cobj":"0015", "value":	"Review"},
+  "conference-abstract":  {"cobj":"0004", "value":	"Conference object"},
+  "conference-paper":     {"cobj":"0004", "value":	"Conference object"},
+  "conference-poster":    {"cobj":"0004", "value":	"Conference object"},
+  "data-set":             {"cobj":"0021", "value":	"Dataset"},
+  "dictionary-entry":     {"cobj":"0038", "value":	"Other literature type"},
+  "disclosure":           {"cobj":"0038", "value":	"Other literature type"},
+  "dissertation":         {"cobj":"0006", "value":	"Doctoral thesis"},
+  "edited-book":          {"cobj":"0002", "value":	"Book"},
+  "encyclopedia-entry":   {"cobj":"0038", "value":	"Other literature type"},
+  "lecture-speech":       {"cobj":"0010", "value":	"Lecture"},
+  "license":              {"cobj":"0038", "value":	"Other literature type"},
+  "magazine-article":     {"cobj":"0005", "value":	"Contribution for newspaper or weekly magazine"},
+  "manual":               {"cobj":"0038", "value":	"Other literature type"},
+  "newsletter-article":   {"cobj":"0012", "value":	"Newsletter"},
+  "newspaper-article":    {"cobj":"0005", "value":	"Contribution for newspaper or weekly magazine"},
+  "patent":               {"cobj":"0019", "value":	"Patent"},
+  "research-technique":   {"cobj":"0020", "value":	"Other ORP type"},
+  "research-tool":        {"cobj":"0020", "value":	"Other ORP type"},
+  "standards-and-policy": {"cobj":"0038", "value":	"Other literature type"},
+  "supervised-student-publication": {"cobj":"0001", "value":	"Article"},
+  "technical-standard":   {"cobj":"0038", "value":	"Other literature type"},
+  "website":              {"cobj":"0020", "value":	"Other ORP type"},
+  "working-paper":        {"cobj":"0014", "value":	"Research"}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml
@ -0,0 +1,95 @@
+<workflow-app name="gen_orcid_no_doi_dataset" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+        <property>
+            <name>workingPath</name>
+            <description>the working dir base path</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
+    <start to="ResetWorkingPath"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    
+    <action name="ResetWorkingPath">
+        <fs>
+            <delete path='${workingPath}/no_doi_dataset'/>
+        </fs>
+        <ok to="GenOrcidNoDoiDataset"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="GenOrcidNoDoiDataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>GenOrcidNoDoiDataset</name>
+            <class>eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks</class>
+            <jar>dhp-doiboost-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+            </spark-opts>
+            <arg>-w</arg><arg>${workingPath}/</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-f</arg><arg>-</arg>
+            <arg>-ow</arg><arg>no_doi_works/</arg>
+            <arg>-oew</arg><arg>no_doi_dataset</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    
+   <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/QueryTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/QueryTest.scala
@ -1,63 +0,0 @@
-package eu.dnetlib.dhp.doiboost
-
-import eu.dnetlib.dhp.schema.oaf.{Publication, Relation}
-import org.apache.spark.SparkContext
-import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
-import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
-import org.json4s
-import org.json4s.DefaultFormats
-import org.json4s.jackson.JsonMethods._
-
-import scala.collection.JavaConverters._
-class QueryTest {
-
-  def extract_payload(input:String) :String = {
-
-    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
-    lazy val json: json4s.JValue = parse(input)
-
-
-    compact(render((json \ "payload")))
-
-
-
-  }
-
-
-  def has_ands(r:Relation) :Boolean = {
-
-    r.getCollectedfrom!= null && r.getCollectedfrom.asScala.count(k => k.getValue.contains("Australian")) > 0
-
-  }
-
-  def hasInstanceWithUrl(p:Publication):Boolean = {
-    val c = p.getInstance.asScala.map(i => i.getUrl!= null && !i.getUrl.isEmpty).size
-    !(!p.getInstance.isEmpty && c == p.getInstance().size)
-  }
-
-
-  def hasNullAccessRights(p:Publication):Boolean = {
-    val c = p.getInstance.asScala.map(i => i.getAccessright!= null && i.getAccessright.getClassname.nonEmpty).size
-    !p.getInstance.isEmpty && c == p.getInstance().size()
-  }
-
-
-  def myQuery(spark:SparkSession, sc:SparkContext): Unit = {
-    implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
-
-
-
-    val mapper = new ObjectMapper()
-    mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
-
-
-      val ds:Dataset[Publication] = spark.read.load("/tmp/p").as[Publication]
-
-
-
-    ds.filter(p =>p.getBestaccessright!= null && p.getBestaccessright.getClassname.nonEmpty).count()
-
-
-  }
-
-}
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala
@ -158,7 +158,7 @@ class CrossrefMappingTest {


    rels.foreach(s => logger.info(s.getTarget))
-    assertEquals(rels.size, 3 )
+    assertEquals(rels.size, 6 )


  }
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala
@ -1,5 +1,8 @@
 package eu.dnetlib.doiboost.orcid

+import eu.dnetlib.dhp.schema.oaf.Publication
+import eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF.getClass
+import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
 import org.codehaus.jackson.map.ObjectMapper
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
@ -21,6 +24,30 @@ class MappingORCIDToOAFTest {
    })
  }

+//  @Test
+//  def testOAFConvert():Unit ={
+//
+//    val spark: SparkSession =
+//      SparkSession
+//        .builder()
+//        .appName(getClass.getSimpleName)
+//        .master("local[*]").getOrCreate()
+//
+//
+//    SparkConvertORCIDToOAF.run( spark,"/Users/sandro/Downloads/orcid", "/Users/sandro/Downloads/orcid_oaf")
+//    implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
+//
+//    val df = spark.read.load("/Users/sandro/Downloads/orcid_oaf").as[Publication]
+//    println(df.first.getId)
+//    println(mapper.writeValueAsString(df.first()))
+//
+//
+//
+//
+//  }
+
+
+



--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java
@ -3,23 +3,34 @@ package eu.dnetlib.doiboost.orcid;

 import static org.junit.jupiter.api.Assertions.assertTrue;

-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.time.temporal.TemporalUnit;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.List;

+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.io.IOUtils;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
+import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull;
 import org.junit.jupiter.api.Test;
+import org.mortbay.log.Log;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import jdk.nashorn.internal.ir.annotations.Ignore;

 public class OrcidClientTest {
 	final String orcidId = "0000-0001-7291-3210";
@ -32,16 +43,64 @@ public class OrcidClientTest {
 	String lastUpdate = "2019-09-30 00:00:00";
 	String shortDate = "2020-05-06 16:06:11";

-//	curl -i -H "Accept: application/vnd.orcid+xml" 
+//	curl -i -H "Accept: application/vnd.orcid+xml"
 //	-H 'Authorization: Bearer 78fdb232-7105-4086-8570-e153f4198e3d'
 //	'https://api.orcid.org/v3.0/0000-0001-7291-3210/record'

-	public String testDownloadRecord(String orcidId) throws Exception {
+	@Test
+	private void multipleDownloadTest() throws Exception {
+		int toDownload = 10;
+		long start = System.currentTimeMillis();
+		OrcidDownloader downloader = new OrcidDownloader();
+		TarArchiveInputStream input = new TarArchiveInputStream(
+			new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar")));
+		TarArchiveEntry entry = input.getNextTarEntry();
+		BufferedReader br = null;
+		StringBuilder sb = new StringBuilder();
+		int rowNum = 0;
+		int entryNum = 0;
+		int modified = 0;
+		while (entry != null) {
+			br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput
+			String line;
+			while ((line = br.readLine()) != null) {
+				String[] values = line.toString().split(",");
+				List<String> recordInfo = Arrays.asList(values);
+				String orcidId = recordInfo.get(0);
+				if (downloader.isModified(orcidId, recordInfo.get(3))) {
+					slowedDownDownload(orcidId);
+					modified++;
+				}
+				rowNum++;
+				if (modified > toDownload) {
+					break;
+				}
+			}
+			entryNum++;
+			entry = input.getNextTarEntry();
+		}
+		long end = System.currentTimeMillis();
+		logToFile("start test: " + new Date(start).toString());
+		logToFile("end test: " + new Date(end).toString());
+	}
+
+	@Test
+	private void downloadTest(String orcid) throws Exception {
+		String record = testDownloadRecord(orcid);
+		String filename = "/tmp/downloaded_".concat(orcid).concat(".xml");
+		File f = new File(filename);
+		OutputStream outStream = new FileOutputStream(f);
+		IOUtils.write(record.getBytes(), outStream);
+	}
+
+	private String testDownloadRecord(String orcidId) throws Exception {
 		try (CloseableHttpClient client = HttpClients.createDefault()) {
 			HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
 			httpGet.addHeader("Accept", "application/vnd.orcid+xml");
 			httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d");
+			logToFile("start connection: " + new Date(System.currentTimeMillis()).toString());
 			CloseableHttpResponse response = client.execute(httpGet);
+			logToFile("end connection: " + new Date(System.currentTimeMillis()).toString());
 			if (response.getStatusLine().getStatusCode() != 200) {
 				System.out
 					.println("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
@ -53,8 +112,8 @@ public class OrcidClientTest {
 		return new String("");
 	}

-//	@Test
-	public void testLambdaFileParser() throws Exception {
+	// @Test
+	private void testLambdaFileParser() throws Exception {
 		try (BufferedReader br = new BufferedReader(
 			new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) {
 			String line;
@ -99,8 +158,8 @@ public class OrcidClientTest {
 		}
 	}

-//	@Test
-	public void getRecordDatestamp() throws ParseException {
+	// @Test
+	private void getRecordDatestamp() throws ParseException {
 		Date toRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toRetrieveDate);
 		Date toNotRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toNotRetrieveDate);
 		Date lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
@ -108,7 +167,7 @@ public class OrcidClientTest {
 		assertTrue(!toNotRetrieveDateDt.after(lastUpdateDt));
 	}

-	public void testDate(String value) throws ParseException {
+	private void testDate(String value) throws ParseException {
 		System.out.println(value.toString());
 		if (value.length() != 19) {
 			value = value.substring(0, 19);
@ -117,20 +176,126 @@ public class OrcidClientTest {
 		System.out.println(valueDt.toString());
 	}

-//	@Test
-	public void testModifiedDate() throws ParseException {
+	// @Test
+	@Ignore
+	private void testModifiedDate() throws ParseException {
 		testDate(toRetrieveDate);
 		testDate(toNotRetrieveDate);
 		testDate(shortDate);
 	}

-//	@Test
-	public void testReadBase64CompressedRecord() throws Exception {
+	@Test
+	private void testReadBase64CompressedRecord() throws Exception {
 		final String base64CompressedRecord = IOUtils
-			.toString(getClass().getResourceAsStream("0000-0001-6645-509X.compressed.base64"));
+			.toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64"));
 		final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord);
-		System.out.println(recordFromSeqFile);
-		final String downloadedRecord = testDownloadRecord("0000-0001-6645-509X");
+		logToFile("\n\ndownloaded \n\n" + recordFromSeqFile);
+		final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161");
 		assertTrue(recordFromSeqFile.equals(downloadedRecord));
 	}
+
+	@Test
+	private void lambdaFileReaderTest() throws Exception {
+		TarArchiveInputStream input = new TarArchiveInputStream(
+			new GzipCompressorInputStream(new FileInputStream("/develop/last_modified.csv.tar")));
+		TarArchiveEntry entry = input.getNextTarEntry();
+		BufferedReader br = null;
+		StringBuilder sb = new StringBuilder();
+		int rowNum = 0;
+		int entryNum = 0;
+		while (entry != null) {
+			br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput
+			String line;
+			while ((line = br.readLine()) != null) {
+				String[] values = line.toString().split(",");
+				List<String> recordInfo = Arrays.asList(values);
+				assertTrue(recordInfo.size() == 4);
+
+				rowNum++;
+				if (rowNum == 1) {
+					assertTrue(recordInfo.get(3).equals("last_modified"));
+				} else if (rowNum == 2) {
+					assertTrue(recordInfo.get(0).equals("0000-0002-0499-7333"));
+				}
+			}
+			entryNum++;
+			assertTrue(entryNum == 1);
+			entry = input.getNextTarEntry();
+		}
+	}
+
+	@Test
+	private void lambdaFileCounterTest() throws Exception {
+		final String lastUpdate = "2020-09-29 00:00:00";
+		OrcidDownloader downloader = new OrcidDownloader();
+		TarArchiveInputStream input = new TarArchiveInputStream(
+			new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar")));
+		TarArchiveEntry entry = input.getNextTarEntry();
+		BufferedReader br = null;
+		StringBuilder sb = new StringBuilder();
+		int rowNum = 0;
+		int entryNum = 0;
+		int modified = 0;
+		while (entry != null) {
+			br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput
+			String line;
+			while ((line = br.readLine()) != null) {
+				String[] values = line.toString().split(",");
+				List<String> recordInfo = Arrays.asList(values);
+				String orcidId = recordInfo.get(0);
+				if (downloader.isModified(orcidId, recordInfo.get(3))) {
+					modified++;
+				}
+				rowNum++;
+			}
+			entryNum++;
+			entry = input.getNextTarEntry();
+		}
+		logToFile("rowNum: " + rowNum);
+		logToFile("modified: " + modified);
+	}
+
+	private void logToFile(String log)
+		throws IOException {
+		log = log.concat("\n");
+		Path path = Paths.get("/tmp/orcid_log.txt");
+		Files.write(path, log.getBytes(), StandardOpenOption.APPEND);
+	}
+
+	@Test
+	private void slowedDownDownloadTest() throws Exception {
+		String orcid = "0000-0001-5496-1243";
+		String record = slowedDownDownload(orcid);
+		String filename = "/tmp/downloaded_".concat(orcid).concat(".xml");
+		File f = new File(filename);
+		OutputStream outStream = new FileOutputStream(f);
+		IOUtils.write(record.getBytes(), outStream);
+	}
+
+	private String slowedDownDownload(String orcidId) throws Exception {
+		try (CloseableHttpClient client = HttpClients.createDefault()) {
+			HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
+			httpGet.addHeader("Accept", "application/vnd.orcid+xml");
+			httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d");
+			long start = System.currentTimeMillis();
+			CloseableHttpResponse response = client.execute(httpGet);
+			long endReq = System.currentTimeMillis();
+			long reqSessionDuration = endReq - start;
+			logToFile("req time (millisec): " + reqSessionDuration);
+			if (reqSessionDuration < 1000) {
+				logToFile("wait ....");
+				Thread.sleep(1000 - reqSessionDuration);
+			}
+			long end = System.currentTimeMillis();
+			long total = end - start;
+			logToFile("total time (millisec): " + total);
+			if (response.getStatusLine().getStatusCode() != 200) {
+				logToFile("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
+			}
+			return IOUtils.toString(response.getEntity().getContent());
+		} catch (Throwable e) {
+			e.printStackTrace();
+		}
+		return new String("");
+	}
 }
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java
@ -2,17 +2,19 @@
 package eu.dnetlib.doiboost.orcid.xml;

 import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;

 import org.apache.commons.io.IOUtils;
 import org.junit.jupiter.api.Test;

-import eu.dnetlib.doiboost.orcid.model.AuthorData;
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
 import eu.dnetlib.doiboost.orcid.model.WorkData;
+import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter;

 public class XMLRecordParserTest {

 	@Test
-	public void testOrcidAuthorDataXMLParser() throws Exception {
+	private void testOrcidAuthorDataXMLParser() throws Exception {

 		String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml"));

@ -27,7 +29,7 @@ public class XMLRecordParserTest {
 	}

 	@Test
-	public void testOrcidXMLErrorRecordParser() throws Exception {
+	private void testOrcidXMLErrorRecordParser() throws Exception {

 		String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml"));

@ -40,11 +42,11 @@ public class XMLRecordParserTest {
 	}

 	@Test
-	public void testOrcidWorkDataXMLParser() throws Exception {
+	private void testOrcidWorkDataXMLParser() throws Exception {

 		String xml = IOUtils
 			.toString(
-				this.getClass().getResourceAsStream("activity_work_0000-0002-5982-8983.xml"));
+				this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml"));

 		XMLRecordParser p = new XMLRecordParser();

@ -55,4 +57,21 @@ public class XMLRecordParserTest {
 		assertNotNull(workData.getDoi());
 		System.out.println("doi: " + workData.getDoi());
 	}
+
+	@Test
+	public void testOrcidOtherNamesXMLParser() throws Exception {
+
+		String xml = IOUtils
+			.toString(
+				this.getClass().getResourceAsStream("summary_0000-0001-5109-1000_othername.xml"));
+
+		XMLRecordParser p = new XMLRecordParser();
+
+		AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes());
+		assertNotNull(authorData);
+		assertNotNull(authorData.getOtherNames());
+		assertTrue(authorData.getOtherNames().get(0).equals("Andrew C. Porteus"));
+		String jsonData = JsonWriter.create(authorData);
+		assertNotNull(jsonData);
+	}
 }
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java
@ -0,0 +1,78 @@
+
+package eu.dnetlib.doiboost.orcidnodoi;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.JsonElement;
+import com.google.gson.JsonParser;
+
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
+import jdk.nashorn.internal.ir.annotations.Ignore;
+
+public class PublicationToOafTest {
+
+	private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class);
+
+	@Test
+	@Ignore
+	private void convertOafPublicationTest() throws Exception {
+		String jsonPublication = IOUtils
+			.toString(
+				PublicationToOafTest.class.getResourceAsStream("publication.json"));
+		JsonElement j = new JsonParser().parse(jsonPublication);
+		logger.info("json publication loaded: " + j.toString());
+		PublicationToOaf publicationToOaf = new PublicationToOaf();
+		Publication oafPublication = (Publication) publicationToOaf
+			.generatePublicationActionsFromDump(j.getAsJsonObject());
+		assertNotNull(oafPublication.getId());
+		assertNotNull(oafPublication.getOriginalId());
+		assertEquals(oafPublication.getOriginalId().get(0), "60153327");
+		logger.info("oafPublication.getId(): " + oafPublication.getId());
+		assertEquals(
+			oafPublication.getTitle().get(0).getValue(),
+			"Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp");
+		assertNotNull(oafPublication.getLastupdatetimestamp());
+		assertNotNull(oafPublication.getDateofcollection());
+		assertNotNull(oafPublication.getDateoftransformation());
+		assertTrue(oafPublication.getAuthor().size() == 7);
+		oafPublication.getAuthor().forEach(a -> {
+			assertNotNull(a.getFullname());
+			assertNotNull(a.getRank());
+			logger.info("a.getFullname(): " + a.getFullname());
+			if (a.getName() != null) {
+				logger.info("a.getName(): " + a.getName());
+			}
+			if (a.getSurname() != null) {
+				logger.info("a.getSurname(): " + a.getSurname());
+			}
+			logger.info("a.getRank(): " + a.getRank());
+			if (a.getPid() != null) {
+				logger.info("a.getPid(): " + a.getPid().get(0).getValue());
+			}
+
+		});
+		assertNotNull(oafPublication.getCollectedfrom());
+		if (oafPublication.getSource() != null) {
+			logger.info((oafPublication.getSource().get(0).getValue()));
+		}
+		if (oafPublication.getExternalReference() != null) {
+			oafPublication.getExternalReference().forEach(e -> {
+				assertNotNull(e.getRefidentifier());
+				assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types");
+			});
+		}
+		assertNotNull(oafPublication.getInstance());
+		oafPublication.getInstance().forEach(i -> {
+			assertNotNull(i.getInstancetype().getClassid());
+			logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid());
+			assertNotNull(i.getInstancetype().getClassname());
+			logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname());
+		});
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java
@ -0,0 +1,348 @@
+
+package eu.dnetlib.doiboost.orcidnodoi.xml;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Lists;
+import com.ximpleware.NavException;
+import com.ximpleware.ParseException;
+import com.ximpleware.XPathEvalException;
+import com.ximpleware.XPathParseException;
+
+import eu.dnetlib.dhp.parser.utility.VtdException;
+import eu.dnetlib.dhp.schema.orcid.AuthorData;
+import eu.dnetlib.doiboost.orcidnodoi.model.Contributor;
+import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
+import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
+
+public class OrcidNoDoiTest {
+
+	private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class);
+
+	static String nameA = "Khairy";
+	static String surnameA = "Abdel Dayem";
+	static String orcidIdA = "0000-0003-2760-1191";
+
+	@Test
+	public void readPublicationFieldsTest()
+		throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
+		logger.info("running loadPublicationFieldsTest ....");
+		String xml = IOUtils
+			.toString(
+				OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0002-2536-4498.xml"));
+
+		if (xml == null) {
+			logger.info("Resource not found");
+		}
+		XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
+		if (p == null) {
+			logger.info("XMLRecordParserNoDoi null");
+		}
+		WorkDataNoDoi workData = null;
+		try {
+			workData = p.VTDParseWorkData(xml.getBytes());
+		} catch (Exception e) {
+			logger.error("parsing xml", e);
+		}
+		assertNotNull(workData);
+		assertNotNull(workData.getOid());
+		logger.info("oid: " + workData.getOid());
+		assertNotNull(workData.getTitles());
+		logger.info("titles: ");
+		workData.getTitles().forEach(t -> {
+			logger.info(t);
+		});
+		logger.info("source: " + workData.getSourceName());
+		logger.info("type: " + workData.getType());
+		logger.info("urls: ");
+		workData.getUrls().forEach(u -> {
+			logger.info(u);
+		});
+		logger.info("publication date: ");
+		workData.getPublicationDates().forEach(d -> {
+			logger.info(d.getYear() + " - " + d.getMonth() + " - " + d.getDay());
+		});
+		logger.info("external id: ");
+		workData.getExtIds().removeIf(e -> e.getRelationShip() != null && !e.getRelationShip().equals("self"));
+		workData.getExtIds().forEach(e -> {
+			logger.info(e.getType() + " - " + e.getValue() + " - " + e.getRelationShip());
+		});
+		logger.info("contributors: ");
+		workData.getContributors().forEach(c -> {
+			logger
+				.info(
+					c.getName() + " - " + c.getRole() + " - " + c.getSequence());
+		});
+
+	}
+
+	@Test
+	public void authorDoubleMatchTest() throws Exception {
+		logger.info("running authorSimpleMatchTest ....");
+		String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
+		AuthorData author = new AuthorData();
+		author.setName(nameA);
+		author.setSurname(surnameA);
+		author.setOid(orcidIdA);
+		String xml = IOUtils
+			.toString(
+				OrcidNoDoiTest.class.getResourceAsStream(orcidWork));
+
+		if (xml == null) {
+			logger.info("Resource not found");
+		}
+		XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
+		if (p == null) {
+			logger.info("XMLRecordParserNoDoi null");
+		}
+		WorkDataNoDoi workData = null;
+		try {
+			workData = p.VTDParseWorkData(xml.getBytes());
+		} catch (Exception e) {
+			logger.error("parsing xml", e);
+		}
+		assertNotNull(workData);
+
+		Contributor a = workData.getContributors().get(0);
+		assertTrue(a.getCreditName().equals("Abdel-Dayem K"));
+
+		AuthorMatcher.match(author, workData.getContributors());
+
+		assertTrue(workData.getContributors().size() == 6);
+	}
+
+	@Test
+	public void readContributorsTest()
+		throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
+		logger.info("running loadPublicationFieldsTest ....");
+		String xml = IOUtils
+			.toString(
+				OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml"));
+
+		if (xml == null) {
+			logger.info("Resource not found");
+		}
+		XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
+		if (p == null) {
+			logger.info("XMLRecordParserNoDoi null");
+		}
+		WorkDataNoDoi workData = null;
+		try {
+			workData = p.VTDParseWorkData(xml.getBytes());
+		} catch (Exception e) {
+			logger.error("parsing xml", e);
+		}
+		assertNotNull(workData.getContributors());
+		assertTrue(workData.getContributors().size() == 5);
+		assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName()));
+		assertTrue(workData.getContributors().get(0).getSequence().equals("seq0"));
+		assertTrue(workData.getContributors().get(0).getRole().equals("role0"));
+		assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1"));
+		assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence()));
+		assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole()));
+		assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2"));
+		assertTrue(workData.getContributors().get(2).getSequence().equals("seq2"));
+		assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole()));
+		assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3"));
+		assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence()));
+		assertTrue(workData.getContributors().get(3).getRole().equals("role3"));
+		assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName()));
+		assertTrue(workData.getContributors().get(4).getSequence().equals("seq4"));
+		assertTrue(workData.getContributors().get(4).getRole().equals("role4"));
+	}
+
+	@Test
+	public void authorSimpleMatchTest() throws Exception {
+		String orcidWork = "activity_work_0000-0002-5982-8983.xml";
+		AuthorData author = new AuthorData();
+		author.setName("Parkhouse");
+		author.setSurname("H.");
+		author.setOid("0000-0002-5982-8983");
+		String xml = IOUtils
+			.toString(
+				OrcidNoDoiTest.class.getResourceAsStream(orcidWork));
+
+		if (xml == null) {
+			logger.info("Resource not found");
+		}
+		XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
+		if (p == null) {
+			logger.info("XMLRecordParserNoDoi null");
+		}
+		WorkDataNoDoi workData = null;
+		try {
+			workData = p.VTDParseWorkData(xml.getBytes());
+		} catch (Exception e) {
+			logger.error("parsing xml", e);
+		}
+		assertNotNull(workData);
+
+		Contributor a = workData.getContributors().get(0);
+		assertTrue(a.getCreditName().equals("Parkhouse, H."));
+
+		AuthorMatcher.match(author, workData.getContributors());
+
+		assertTrue(workData.getContributors().size() == 2);
+		Contributor c = workData.getContributors().get(0);
+		assertTrue(c.getOid().equals("0000-0002-5982-8983"));
+		assertTrue(c.getName().equals("Parkhouse"));
+		assertTrue(c.getSurname().equals("H."));
+		assertTrue(c.getCreditName().equals("Parkhouse, H."));
+	}
+
+	@Test
+	public void match() {
+
+		AuthorData author = new AuthorData();
+		author.setName("Joe");
+		author.setSurname("Dodge");
+		author.setOid("0000-1111-2222-3333");
+		Contributor contributor = new Contributor();
+		contributor.setCreditName("Joe Dodge");
+		List<Contributor> contributors = Arrays.asList(contributor);
+		AuthorMatcher am = new AuthorMatcher();
+		int matchCounter = 0;
+		List<Integer> matchCounters = Arrays.asList(matchCounter);
+		contributors
+			.stream()
+			.filter(c -> !StringUtils.isBlank(c.getCreditName()))
+			.forEach(c -> {
+				if (am.simpleMatch(c.getCreditName(), author.getName()) ||
+					am.simpleMatch(c.getCreditName(), author.getSurname()) ||
+					am.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
+					matchCounters.set(0, matchCounters.get(0) + 1);
+					c.setSimpleMatch(true);
+				}
+			});
+
+		assertTrue(matchCounters.get(0) == 1);
+		am.updateAuthorsSimpleMatch(contributors, author);
+		assertTrue(contributors.get(0).getName().equals("Joe"));
+		assertTrue(contributors.get(0).getSurname().equals("Dodge"));
+		assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge"));
+		assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333"));
+
+		AuthorData authorX = new AuthorData();
+		authorX.setName(nameA);
+		authorX.setSurname(surnameA);
+		authorX.setOid(orcidIdA);
+		Contributor contributorA = new Contributor();
+		contributorA.setCreditName("Abdel-Dayem Khai");
+		Contributor contributorB = new Contributor();
+		contributorB.setCreditName("Abdel-Dayem Fake");
+		List<Contributor> contributorList = new ArrayList<>();
+		contributorList.add(contributorA);
+		contributorList.add(contributorB);
+		int matchCounter2 = 0;
+		List<Integer> matchCounters2 = Arrays.asList(matchCounter2);
+		contributorList
+			.stream()
+			.filter(c -> !StringUtils.isBlank(c.getCreditName()))
+			.forEach(c -> {
+				if (am.simpleMatch(c.getCreditName(), authorX.getName()) ||
+					am.simpleMatch(c.getCreditName(), authorX.getSurname()) ||
+					am.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
+					int currentCounter = matchCounters2.get(0);
+					currentCounter += 1;
+					matchCounters2.set(0, currentCounter);
+					c.setSimpleMatch(true);
+				}
+			});
+
+		assertTrue(matchCounters2.get(0) == 2);
+		assertTrue(contributorList.get(0).isSimpleMatch());
+		assertTrue(contributorList.get(1).isSimpleMatch());
+
+		Optional<Contributor> optCon = contributorList
+			.stream()
+			.filter(c -> c.isSimpleMatch())
+			.filter(c -> !StringUtils.isBlank(c.getCreditName()))
+			.map(c -> {
+				c.setScore(am.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName()));
+				return c;
+			})
+			.filter(c -> c.getScore() >= AuthorMatcher.threshold)
+			.max(Comparator.comparing(c -> c.getScore()));
+		assertTrue(optCon.isPresent());
+
+		final Contributor bestMatchContributor = optCon.get();
+		bestMatchContributor.setBestMatch(true);
+		assertTrue(bestMatchContributor.getCreditName().equals("Abdel-Dayem Khai"));
+		assertTrue(contributorList.get(0).isBestMatch());
+		assertTrue(!contributorList.get(1).isBestMatch());
+		am.updateAuthorsSimilarityMatch(contributorList, authorX);
+		assertTrue(contributorList.get(0).getName().equals(nameA));
+		assertTrue(contributorList.get(0).getSurname().equals(surnameA));
+		assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai"));
+		assertTrue(contributorList.get(0).getOid().equals(orcidIdA));
+		assertTrue(StringUtils.isBlank(contributorList.get(1).getOid()));
+	}
+
+	@Test
+	public void authorBestMatchTest() throws Exception {
+		String name = "Khairy";
+		String surname = "Abdel Dayem";
+		String orcidWork = "activity_work_0000-0003-2760-1191.xml";
+		AuthorData author = new AuthorData();
+		author.setName(name);
+		author.setSurname(surname);
+		author.setOid(orcidIdA);
+		String xml = IOUtils
+			.toString(
+				OrcidNoDoiTest.class.getResourceAsStream(orcidWork));
+
+		if (xml == null) {
+			logger.info("Resource not found");
+		}
+		XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
+		if (p == null) {
+			logger.info("XMLRecordParserNoDoi null");
+		}
+		WorkDataNoDoi workData = null;
+		try {
+			workData = p.VTDParseWorkData(xml.getBytes());
+		} catch (Exception e) {
+			logger.error("parsing xml", e);
+		}
+		AuthorMatcher.match(author, workData.getContributors());
+		assertTrue(workData.getContributors().size() == 5);
+		List<Contributor> c = workData.getContributors();
+		assertTrue(c.get(0).getName().equals(name));
+		assertTrue(c.get(0).getSurname().equals(surname));
+		assertTrue(c.get(0).getCreditName().equals("Khair Abde Daye"));
+		assertTrue(c.get(0).getOid().equals(orcidIdA));
+	}
+
+	@Test
+	public void otherNamesMatchTest()
+		throws VtdException, ParseException, IOException, XPathEvalException, NavException, XPathParseException {
+
+		AuthorData author = new AuthorData();
+		author.setName("Joe");
+		author.setSurname("Dodge");
+		author.setOid("0000-1111-2222-3333");
+		String otherName1 = new String("Joe Dr. Dodge");
+		String otherName2 = new String("XY");
+		List<String> others = Lists.newArrayList();
+		others.add(otherName1);
+		others.add(otherName2);
+		author.setOtherNames(others);
+		Contributor contributor = new Contributor();
+		contributor.setCreditName("XY");
+		List<Contributor> contributors = Arrays.asList(contributor);
+		AuthorMatcher.match(author, contributors);
+		assertTrue(contributors.get(0).getName().equals("Joe"));
+		assertTrue(contributors.get(0).getSurname().equals("Dodge"));
+		assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333"));
+	}
+}
--- a/Show More
+++ b/Show More