merge branch with master

2020-12-16 10:51:18 +01:00 · 2020-12-16 10:51:18 +01:00 · 7c86e66697
parent bc09d37e8c de00094ebc
commit 7c86e66697
93 changed files with 2665 additions and 862 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
@ -1,10 +1,10 @@

 package eu.dnetlib.dhp.common;

-import com.google.common.collect.Maps;
-
 import java.util.Map;

+import com.google.common.collect.Maps;
+
 public class Constants {

 	public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java
@ -1,6 +1,10 @@

 package eu.dnetlib.dhp.common;

+import java.io.Serializable;
+import java.util.*;
+import java.util.stream.Collectors;
+
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.dump.oaf.*;
 import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance;
@ -10,406 +14,399 @@ import eu.dnetlib.dhp.schema.oaf.Field;
 import eu.dnetlib.dhp.schema.oaf.Journal;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;

-import java.io.Serializable;
-import java.util.*;
-import java.util.stream.Collectors;
-
 public class GraphResultMapper implements Serializable {

-    public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
-            E in) {
+	public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
+		E in) {

-        CommunityResult out = new CommunityResult();
+		CommunityResult out = new CommunityResult();

-        eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
-        Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
-        if (ort.isPresent()) {
-            switch (ort.get().getClassid()) {
-                case "publication":
-                    Optional<Journal> journal = Optional
-                            .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
-                    if (journal.isPresent()) {
-                        Journal j = journal.get();
-                        Container c = new Container();
-                        c.setConferencedate(j.getConferencedate());
-                        c.setConferenceplace(j.getConferenceplace());
-                        c.setEdition(j.getEdition());
-                        c.setEp(j.getEp());
-                        c.setIss(j.getIss());
-                        c.setIssnLinking(j.getIssnLinking());
-                        c.setIssnOnline(j.getIssnOnline());
-                        c.setIssnPrinted(j.getIssnPrinted());
-                        c.setName(j.getName());
-                        c.setSp(j.getSp());
-                        c.setVol(j.getVol());
-                        out.setContainer(c);
-                        out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
-                    }
-                    break;
-                case "dataset":
-                    eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
-                    Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
-                    Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
+		eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
+		Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
+		if (ort.isPresent()) {
+			switch (ort.get().getClassid()) {
+				case "publication":
+					Optional<Journal> journal = Optional
+						.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
+					if (journal.isPresent()) {
+						Journal j = journal.get();
+						Container c = new Container();
+						c.setConferencedate(j.getConferencedate());
+						c.setConferenceplace(j.getConferenceplace());
+						c.setEdition(j.getEdition());
+						c.setEp(j.getEp());
+						c.setIss(j.getIss());
+						c.setIssnLinking(j.getIssnLinking());
+						c.setIssnOnline(j.getIssnOnline());
+						c.setIssnPrinted(j.getIssnPrinted());
+						c.setName(j.getName());
+						c.setSp(j.getSp());
+						c.setVol(j.getVol());
+						out.setContainer(c);
+						out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
+					}
+					break;
+				case "dataset":
+					eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
+					Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
+					Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));

-                    out
-                            .setGeolocation(
-                                    Optional
-                                            .ofNullable(id.getGeolocation())
-                                            .map(
-                                                    igl -> igl
-                                                            .stream()
-                                                            .filter(Objects::nonNull)
-                                                            .map(gli -> {
-                                                                GeoLocation gl = new GeoLocation();
-                                                                gl.setBox(gli.getBox());
-                                                                gl.setPlace(gli.getPlace());
-                                                                gl.setPoint(gli.getPoint());
-                                                                return gl;
-                                                            })
-                                                            .collect(Collectors.toList()))
-                                            .orElse(null));
+					out
+						.setGeolocation(
+							Optional
+								.ofNullable(id.getGeolocation())
+								.map(
+									igl -> igl
+										.stream()
+										.filter(Objects::nonNull)
+										.map(gli -> {
+											GeoLocation gl = new GeoLocation();
+											gl.setBox(gli.getBox());
+											gl.setPlace(gli.getPlace());
+											gl.setPoint(gli.getPoint());
+											return gl;
+										})
+										.collect(Collectors.toList()))
+								.orElse(null));

-                    out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
-                    break;
-                case "software":
+					out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
+					break;
+				case "software":

-                    eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
-                    Optional
-                            .ofNullable(is.getCodeRepositoryUrl())
-                            .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
-                    Optional
-                            .ofNullable(is.getDocumentationUrl())
-                            .ifPresent(
-                                    value -> out
-                                            .setDocumentationUrl(
-                                                    value
-                                                            .stream()
-                                                            .map(v -> v.getValue())
-                                                            .collect(Collectors.toList())));
+					eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
+					Optional
+						.ofNullable(is.getCodeRepositoryUrl())
+						.ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
+					Optional
+						.ofNullable(is.getDocumentationUrl())
+						.ifPresent(
+							value -> out
+								.setDocumentationUrl(
+									value
+										.stream()
+										.map(v -> v.getValue())
+										.collect(Collectors.toList())));

-                    Optional
-                            .ofNullable(is.getProgrammingLanguage())
-                            .ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));
+					Optional
+						.ofNullable(is.getProgrammingLanguage())
+						.ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));

-                    out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
-                    break;
-                case "other":
+					out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
+					break;
+				case "other":

-                    eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
-                    out
-                            .setContactgroup(
-                                    Optional
-                                            .ofNullable(ir.getContactgroup())
-                                            .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
-                                            .orElse(null));
+					eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
+					out
+						.setContactgroup(
+							Optional
+								.ofNullable(ir.getContactgroup())
+								.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
+								.orElse(null));

-                    out
-                            .setContactperson(
-                                    Optional
-                                            .ofNullable(ir.getContactperson())
-                                            .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
-                                            .orElse(null));
-                    out
-                            .setTool(
-                                    Optional
-                                            .ofNullable(ir.getTool())
-                                            .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
-                                            .orElse(null));
+					out
+						.setContactperson(
+							Optional
+								.ofNullable(ir.getContactperson())
+								.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
+								.orElse(null));
+					out
+						.setTool(
+							Optional
+								.ofNullable(ir.getTool())
+								.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
+								.orElse(null));

-                    out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
+					out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());

-                    break;
-            }
+					break;
+			}

-            Optional
-                    .ofNullable(input.getAuthor())
-                    .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));
+			Optional
+				.ofNullable(input.getAuthor())
+				.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));

-            // I do not map Access Right UNKNOWN or OTHER
+			// I do not map Access Right UNKNOWN or OTHER

-            Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
-            if (oar.isPresent()) {
-                if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
-                    String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
-                    out
-                            .setBestaccessright(
-                                    AccessRight
-                                            .newInstance(
-                                                    code,
-                                                    Constants.coarCodeLabelMap.get(code),
-                                                    Constants.COAR_ACCESS_RIGHT_SCHEMA));
-                }
-            }
+			Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
+			if (oar.isPresent()) {
+				if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
+					String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
+					out
+						.setBestaccessright(
+							AccessRight
+								.newInstance(
+									code,
+									Constants.coarCodeLabelMap.get(code),
+									Constants.COAR_ACCESS_RIGHT_SCHEMA));
+				}
+			}

-            final List<String> contributorList = new ArrayList<>();
-            Optional
-                    .ofNullable(input.getContributor())
-                    .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
-            out.setContributor(contributorList);
+			final List<String> contributorList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getContributor())
+				.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
+			out.setContributor(contributorList);

-            Optional
-                    .ofNullable(input.getCountry())
-                    .ifPresent(
-                            value -> out
-                                    .setCountry(
-                                            value
-                                                    .stream()
-                                                    .map(
-                                                            c -> {
-                                                                if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
-                                                                    return null;
-                                                                }
-                                                                Country country = new Country();
-                                                                country.setCode(c.getClassid());
-                                                                country.setLabel(c.getClassname());
-                                                                Optional
-                                                                        .ofNullable(c.getDataInfo())
-                                                                        .ifPresent(
-                                                                                provenance -> country
-                                                                                        .setProvenance(
-                                                                                                Provenance
-                                                                                                        .newInstance(
-                                                                                                                provenance
-                                                                                                                        .getProvenanceaction()
-                                                                                                                        .getClassname(),
-                                                                                                                c.getDataInfo().getTrust())));
-                                                                return country;
-                                                            })
-                                                    .filter(Objects::nonNull)
-                                                    .collect(Collectors.toList())));
+			Optional
+				.ofNullable(input.getCountry())
+				.ifPresent(
+					value -> out
+						.setCountry(
+							value
+								.stream()
+								.map(
+									c -> {
+										if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
+											return null;
+										}
+										Country country = new Country();
+										country.setCode(c.getClassid());
+										country.setLabel(c.getClassname());
+										Optional
+											.ofNullable(c.getDataInfo())
+											.ifPresent(
+												provenance -> country
+													.setProvenance(
+														Provenance
+															.newInstance(
+																provenance
+																	.getProvenanceaction()
+																	.getClassname(),
+																c.getDataInfo().getTrust())));
+										return country;
+									})
+								.filter(Objects::nonNull)
+								.collect(Collectors.toList())));

-            final List<String> coverageList = new ArrayList<>();
-            Optional
-                    .ofNullable(input.getCoverage())
-                    .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
-            out.setCoverage(coverageList);
+			final List<String> coverageList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getCoverage())
+				.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
+			out.setCoverage(coverageList);

-            out.setDateofcollection(input.getDateofcollection());
+			out.setDateofcollection(input.getDateofcollection());

-            final List<String> descriptionList = new ArrayList<>();
-            Optional
-                    .ofNullable(input.getDescription())
-                    .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
-            out.setDescription(descriptionList);
-            Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
-            if (oStr.isPresent()) {
-                out.setEmbargoenddate(oStr.get().getValue());
-            }
+			final List<String> descriptionList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getDescription())
+				.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
+			out.setDescription(descriptionList);
+			Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
+			if (oStr.isPresent()) {
+				out.setEmbargoenddate(oStr.get().getValue());
+			}

-            final List<String> formatList = new ArrayList<>();
-            Optional
-                    .ofNullable(input.getFormat())
-                    .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
-            out.setFormat(formatList);
-            out.setId(input.getId());
-            out.setOriginalId(input.getOriginalId());
+			final List<String> formatList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getFormat())
+				.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
+			out.setFormat(formatList);
+			out.setId(input.getId());
+			out.setOriginalId(input.getOriginalId());

-            Optional<List<eu.dnetlib.dhp.schema.oaf.Instance>> oInst = Optional
-                    .ofNullable(input.getInstance());
+			Optional<List<eu.dnetlib.dhp.schema.oaf.Instance>> oInst = Optional
+				.ofNullable(input.getInstance());

-            if (oInst.isPresent()) {
-                out
-                        .setInstance(
-                                oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList()));
+			if (oInst.isPresent()) {
+				out
+					.setInstance(
+						oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList()));

-            }
+			}

-            Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
-            if (oL.isPresent()) {
-                eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
-                out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
-            }
-            Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
-            if (oLong.isPresent()) {
-                out.setLastupdatetimestamp(oLong.get());
-            }
-            Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
-            if (otitle.isPresent()) {
-                List<StructuredProperty> iTitle = otitle
-                        .get()
-                        .stream()
-                        .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
-                        .collect(Collectors.toList());
-                if (iTitle.size() > 0) {
-                    out.setMaintitle(iTitle.get(0).getValue());
-                }
+			Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
+			if (oL.isPresent()) {
+				eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
+				out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
+			}
+			Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
+			if (oLong.isPresent()) {
+				out.setLastupdatetimestamp(oLong.get());
+			}
+			Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
+			if (otitle.isPresent()) {
+				List<StructuredProperty> iTitle = otitle
+					.get()
+					.stream()
+					.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
+					.collect(Collectors.toList());
+				if (iTitle.size() > 0) {
+					out.setMaintitle(iTitle.get(0).getValue());
+				}

-                iTitle = otitle
-                        .get()
-                        .stream()
-                        .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
-                        .collect(Collectors.toList());
-                if (iTitle.size() > 0) {
-                    out.setSubtitle(iTitle.get(0).getValue());
-                }
+				iTitle = otitle
+					.get()
+					.stream()
+					.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
+					.collect(Collectors.toList());
+				if (iTitle.size() > 0) {
+					out.setSubtitle(iTitle.get(0).getValue());
+				}

-            }
+			}

-            List<ControlledField> pids = new ArrayList<>();
-            Optional
-                    .ofNullable(input.getPid())
-                    .ifPresent(
-                            value -> value
-                                    .stream()
-                                    .forEach(
-                                            p -> pids
-                                                    .add(
-                                                            ControlledField
-                                                                    .newInstance(p.getQualifier().getClassid(), p.getValue()))));
-            out.setPid(pids);
-            oStr = Optional.ofNullable(input.getDateofacceptance());
-            if (oStr.isPresent()) {
-                out.setPublicationdate(oStr.get().getValue());
-            }
-            oStr = Optional.ofNullable(input.getPublisher());
-            if (oStr.isPresent()) {
-                out.setPublisher(oStr.get().getValue());
-            }
+			List<ControlledField> pids = new ArrayList<>();
+			Optional
+				.ofNullable(input.getPid())
+				.ifPresent(
+					value -> value
+						.stream()
+						.forEach(
+							p -> pids
+								.add(
+									ControlledField
+										.newInstance(p.getQualifier().getClassid(), p.getValue()))));
+			out.setPid(pids);
+			oStr = Optional.ofNullable(input.getDateofacceptance());
+			if (oStr.isPresent()) {
+				out.setPublicationdate(oStr.get().getValue());
+			}
+			oStr = Optional.ofNullable(input.getPublisher());
+			if (oStr.isPresent()) {
+				out.setPublisher(oStr.get().getValue());
+			}

-            List<String> sourceList = new ArrayList<>();
-            Optional
-                    .ofNullable(input.getSource())
-                    .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
-            // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
-            List<Subject> subjectList = new ArrayList<>();
-            Optional
-                    .ofNullable(input.getSubject())
-                    .ifPresent(
-                            value -> value
-                                    .forEach(s -> subjectList.add(getSubject(s))));
+			List<String> sourceList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getSource())
+				.ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
+			// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
+			List<Subject> subjectList = new ArrayList<>();
+			Optional
+				.ofNullable(input.getSubject())
+				.ifPresent(
+					value -> value
+						.forEach(s -> subjectList.add(getSubject(s))));

-            out.setSubjects(subjectList);
+			out.setSubjects(subjectList);

-            out.setType(input.getResulttype().getClassid());
-        }
+			out.setType(input.getResulttype().getClassid());
+		}

-        out
-                .setCollectedfrom(
-                        input
-                                .getCollectedfrom()
-                                .stream()
-                                .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
-                                .collect(Collectors.toList()));
+		out
+			.setCollectedfrom(
+				input
+					.getCollectedfrom()
+					.stream()
+					.map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
+					.collect(Collectors.toList()));

+		return out;

-        return out;
+	}

-    }
+	private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
+		CommunityInstance instance = new CommunityInstance();

+		setCommonValue(i, instance);

-    private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
-        CommunityInstance instance = new CommunityInstance();
+		instance
+			.setCollectedfrom(
+				KeyValue
+					.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));

-        setCommonValue(i, instance);
+		instance
+			.setHostedby(
+				KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));

-        instance
-                .setCollectedfrom(
-                        KeyValue
-                                .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
+		return instance;

-        instance
-                .setHostedby(
-                        KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
+	}

-        return instance;
+	private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
+		Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> opAr = Optional
+			.ofNullable(i.getAccessright());
+		if (opAr.isPresent()) {
+			if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
+				String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
+				instance
+					.setAccessright(
+						AccessRight
+							.newInstance(
+								code,
+								Constants.coarCodeLabelMap.get(code),
+								Constants.COAR_ACCESS_RIGHT_SCHEMA));
+			}
+		}

-    }
+		Optional
+			.ofNullable(i.getLicense())
+			.ifPresent(value -> instance.setLicense(value.getValue()));
+		Optional
+			.ofNullable(i.getDateofacceptance())
+			.ifPresent(value -> instance.setPublicationdate(value.getValue()));
+		Optional
+			.ofNullable(i.getRefereed())
+			.ifPresent(value -> instance.setRefereed(value.getClassname()));
+		Optional
+			.ofNullable(i.getInstancetype())
+			.ifPresent(value -> instance.setType(value.getClassname()));
+		Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));

-    private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
-        Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> opAr = Optional
-                .ofNullable(i.getAccessright());
-        if (opAr.isPresent()) {
-            if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
-                String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
-                instance
-                        .setAccessright(
-                                AccessRight
-                                        .newInstance(
-                                                code,
-                                                Constants.coarCodeLabelMap.get(code),
-                                                Constants.COAR_ACCESS_RIGHT_SCHEMA));
-            }
-        }
+	}

-        Optional
-                .ofNullable(i.getLicense())
-                .ifPresent(value -> instance.setLicense(value.getValue()));
-        Optional
-                .ofNullable(i.getDateofacceptance())
-                .ifPresent(value -> instance.setPublicationdate(value.getValue()));
-        Optional
-                .ofNullable(i.getRefereed())
-                .ifPresent(value -> instance.setRefereed(value.getClassname()));
-        Optional
-                .ofNullable(i.getInstancetype())
-                .ifPresent(value -> instance.setType(value.getClassname()));
-        Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
+	private static Subject getSubject(StructuredProperty s) {
+		Subject subject = new Subject();
+		subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
+		Optional<DataInfo> di = Optional.ofNullable(s.getDataInfo());
+		if (di.isPresent()) {
+			Provenance p = new Provenance();
+			p.setProvenance(di.get().getProvenanceaction().getClassname());
+			p.setTrust(di.get().getTrust());
+			subject.setProvenance(p);
+		}

-    }
+		return subject;
+	}

+	private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
+		Author a = new Author();
+		a.setFullname(oa.getFullname());
+		a.setName(oa.getName());
+		a.setSurname(oa.getSurname());
+		a.setRank(oa.getRank());

-    private static Subject getSubject(StructuredProperty s) {
-        Subject subject = new Subject();
-        subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
-        Optional<DataInfo> di = Optional.ofNullable(s.getDataInfo());
-        if (di.isPresent()) {
-            Provenance p = new Provenance();
-            p.setProvenance(di.get().getProvenanceaction().getClassname());
-            p.setTrust(di.get().getTrust());
-            subject.setProvenance(p);
-        }
+		Optional<List<StructuredProperty>> oPids = Optional
+			.ofNullable(oa.getPid());
+		if (oPids.isPresent()) {
+			Pid pid = getOrcid(oPids.get());
+			if (pid != null) {
+				a.setPid(pid);
+			}
+		}

-        return subject;
-    }
+		return a;
+	}

-    private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
-        Author a = new Author();
-        a.setFullname(oa.getFullname());
-        a.setName(oa.getName());
-        a.setSurname(oa.getSurname());
-        a.setRank(oa.getRank());
+	private static Pid getOrcid(List<StructuredProperty> p) {
+		for (StructuredProperty pid : p) {
+			if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
+				Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
+				if (di.isPresent()) {
+					return Pid
+						.newInstance(
+							ControlledField
+								.newInstance(
+									pid.getQualifier().getClassid(),
+									pid.getValue()),
+							Provenance
+								.newInstance(
+									di.get().getProvenanceaction().getClassname(),
+									di.get().getTrust()));
+				} else {
+					return Pid
+						.newInstance(
+							ControlledField
+								.newInstance(
+									pid.getQualifier().getClassid(),
+									pid.getValue())

-        Optional<List<StructuredProperty>> oPids = Optional
-                .ofNullable(oa.getPid());
-        if (oPids.isPresent()) {
-            Pid pid = getOrcid(oPids.get());
-            if (pid != null) {
-                a.setPid(pid);
-            }
-        }
+						);
+				}

-        return a;
-    }
-
-    private static Pid getOrcid(List<StructuredProperty> p) {
-        for (StructuredProperty pid : p) {
-            if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
-                Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
-                if (di.isPresent()) {
-                    return Pid
-                            .newInstance(
-                                    ControlledField
-                                            .newInstance(
-                                                    pid.getQualifier().getClassid(),
-                                                    pid.getValue()),
-                                    Provenance
-                                            .newInstance(
-                                                    di.get().getProvenanceaction().getClassname(),
-                                                    di.get().getTrust()));
-                } else {
-                    return Pid
-                            .newInstance(
-                                    ControlledField
-                                            .newInstance(
-                                                    pid.getQualifier().getClassid(),
-                                                    pid.getValue())
-
-                            );
-                }
-
-            }
-        }
-        return null;
-    }
+			}
+		}
+		return null;
+	}

 }
--- a/dhp-schemas/pom.xml
+++ b/dhp-schemas/pom.xml
@ -6,7 +6,7 @@
        <groupId>eu.dnetlib.dhp</groupId>
        <artifactId>dhp</artifactId>
        <version>1.2.4-SNAPSHOT</version>
-        <relativePath>../</relativePath>
+        <relativePath>../pom.xml</relativePath>
    </parent>

    <artifactId>dhp-schemas</artifactId>
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java
@ -2,8 +2,12 @@
 package eu.dnetlib.dhp.schema.oaf;

 import java.io.Serializable;
+import java.util.Collection;
 import java.util.List;
 import java.util.Objects;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;

 public abstract class Oaf implements Serializable {

@ -40,9 +44,34 @@ public abstract class Oaf implements Serializable {
 		this.lastupdatetimestamp = lastupdatetimestamp;
 	}

-	public void mergeOAFDataInfo(Oaf e) {
-		if (e.getDataInfo() != null && compareTrust(this, e) < 0)
-			dataInfo = e.getDataInfo();
+	public void mergeFrom(Oaf o) {
+		if (Objects.isNull(o)) {
+			return;
+		}
+		setCollectedfrom(
+			Stream
+				.concat(
+					Optional
+						.ofNullable(getCollectedfrom())
+						.map(Collection::stream)
+						.orElse(Stream.empty()),
+					Optional
+						.ofNullable(o.getCollectedfrom())
+						.map(Collection::stream)
+						.orElse(Stream.empty()))
+				.distinct() // relies on KeyValue.equals
+				.collect(Collectors.toList()));
+
+		setLastupdatetimestamp(
+			Math
+				.max(
+					Optional.ofNullable(getLastupdatetimestamp()).orElse(0L),
+					Optional.ofNullable(o.getLastupdatetimestamp()).orElse(0L)));
+	}
+
+	public void mergeOAFDataInfo(Oaf o) {
+		if (o.getDataInfo() != null && compareTrust(this, o) < 0)
+			dataInfo = o.getDataInfo();
 	}

 	protected String extractTrust(Oaf e) {
@ -62,7 +91,7 @@ public abstract class Oaf implements Serializable {
 		if (o == null || getClass() != o.getClass())
 			return false;
 		Oaf oaf = (Oaf) o;
-		return Objects.equals(dataInfo, oaf.dataInfo)
+		return Objects.equals(getDataInfo(), oaf.getDataInfo())
 			&& Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
 	}

--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java
@ -78,14 +78,10 @@ public abstract class OafEntity extends Oaf implements Serializable {
 	}

 	public void mergeFrom(OafEntity e) {
-
-		if (e == null)
-			return;
+		super.mergeFrom(e);

 		originalId = mergeLists(originalId, e.getOriginalId());

-		collectedfrom = mergeLists(collectedfrom, e.getCollectedfrom());
-
 		pid = mergeLists(pid, e.getPid());

 		if (e.getDateofcollection() != null && compareTrust(this, e) < 0)
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java
@ -351,8 +351,6 @@ public class Project extends OafEntity implements Serializable {
 			? p.getFundedamount()
 			: fundedamount;

-		// programme = mergeLists(programme, p.getProgramme());
-
 		h2020classification = mergeLists(h2020classification, p.getH2020classification());

 		mergeOAFDataInfo(e);
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
@ -130,19 +130,7 @@ public class Relation extends Oaf {
 			Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal");
 		checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal");

-		setCollectedfrom(
-			Stream
-				.concat(
-					Optional
-						.ofNullable(getCollectedfrom())
-						.map(Collection::stream)
-						.orElse(Stream.empty()),
-					Optional
-						.ofNullable(r.getCollectedfrom())
-						.map(Collection::stream)
-						.orElse(Stream.empty()))
-				.distinct() // relies on KeyValue.equals
-				.collect(Collectors.toList()));
+		super.mergeFrom(r);
 	}

 	@Override
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java
@ -0,0 +1,25 @@
+
+package eu.dnetlib.dhp.schema.orcid;
+
+import java.util.List;
+
+public class OrcidDOI {
+	private String doi;
+	private List<AuthorData> authors;
+
+	public String getDoi() {
+		return doi;
+	}
+
+	public void setDoi(String doi) {
+		this.doi = doi;
+	}
+
+	public List<AuthorData> getAuthors() {
+		return authors;
+	}
+
+	public void setAuthors(List<AuthorData> authors) {
+		this.authors = authors;
+	}
+}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java
@ -32,15 +32,15 @@ public class CheckDuplictedIdsJob {
 			IOUtils
 				.toString(
 					CheckDuplictedIdsJob.class
-						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
+						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/check_duplicates.json")));
 		parser.parseArgument(args);

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

-		final String countPath = parser.get("workingPath") + "/counts";
+		final String countPath = parser.get("outputDir") + "/counts";
 		log.info("countPath: {}", countPath);

 		final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
@ -59,6 +59,7 @@ public class CheckDuplictedIdsJob {
 			.map(o -> ClusterUtils.incrementAccumulator(o, total), Encoders.tuple(Encoders.STRING(), Encoders.LONG()))
 			.write()
 			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
 			.json(countPath);
 		;

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsJob.java
@ -44,10 +44,10 @@ public class GenerateEventsJob {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String eventsPath = workingPath + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final Set<String> dsIdWhitelist = ClusterUtils.parseParamAsList(parser, "datasourceIdWhitelist");
@ -59,6 +59,9 @@ public class GenerateEventsJob {
 		final Set<String> dsIdBlacklist = ClusterUtils.parseParamAsList(parser, "datasourceIdBlacklist");
 		log.info("datasourceIdBlacklist: {}", StringUtils.join(dsIdBlacklist, ","));

+		final Set<String> topicWhitelist = ClusterUtils.parseParamAsList(parser, "topicWhitelist");
+		log.info("topicWhitelist: {}", StringUtils.join(topicWhitelist, ","));
+
 		final SparkConf conf = new SparkConf();

 		runWithSparkSession(conf, isSparkSessionManaged, spark -> {
@ -70,12 +73,12 @@ public class GenerateEventsJob {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_events");

 			final Dataset<ResultGroup> groups = ClusterUtils
-				.readPath(spark, workingPath + "/duplicates", ResultGroup.class);
+				.readPath(spark, workingDir + "/duplicates", ResultGroup.class);

 			final Dataset<Event> dataset = groups
 				.map(
 					g -> EventFinder
-						.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, accumulators),
+						.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, topicWhitelist, accumulators),
 					Encoders
 						.bean(EventGroup.class))
 				.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java
@ -46,7 +46,7 @@ public class GenerateStatsJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final String dbUrl = parser.get("dbUrl");
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java
@ -46,7 +46,7 @@ public class IndexEventSubsetJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final String index = parser.get("index");
@ -55,6 +55,18 @@ public class IndexEventSubsetJob {
 		final String indexHost = parser.get("esHost");
 		log.info("indexHost: {}", indexHost);

+		final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
+		log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
+
+		final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
+		log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
+
+		final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
+		log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
+
+		final String esNodesWanOnly = parser.get("esNodesWanOnly");
+		log.info("esNodesWanOnly: {}", esNodesWanOnly);
+
 		final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic"));
 		log.info("maxEventsForTopic: {}", maxEventsForTopic);

@ -86,10 +98,10 @@ public class IndexEventSubsetJob {
 		esCfg.put("es.index.auto.create", "false");
 		esCfg.put("es.nodes", indexHost);
 		esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
-		esCfg.put("es.batch.write.retry.count", "8");
-		esCfg.put("es.batch.write.retry.wait", "60s");
-		esCfg.put("es.batch.size.entries", "200");
-		esCfg.put("es.nodes.wan.only", "true");
+		esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
+		esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
+		esCfg.put("es.batch.size.entries", esBatchSizeEntries);
+		esCfg.put("es.nodes.wan.only", esNodesWanOnly);

 		log.info("*** Start indexing");
 		JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java
@ -54,7 +54,7 @@ public class IndexNotificationsJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final String index = parser.get("index");
@ -63,6 +63,18 @@ public class IndexNotificationsJob {
 		final String indexHost = parser.get("esHost");
 		log.info("indexHost: {}", indexHost);

+		final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
+		log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
+
+		final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
+		log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
+
+		final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
+		log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
+
+		final String esNodesWanOnly = parser.get("esNodesWanOnly");
+		log.info("esNodesWanOnly: {}", esNodesWanOnly);
+
 		final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
 		log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);

@ -92,10 +104,10 @@ public class IndexNotificationsJob {
 			esCfg.put("es.index.auto.create", "false");
 			esCfg.put("es.nodes", indexHost);
 			esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
-			esCfg.put("es.batch.write.retry.count", "8");
-			esCfg.put("es.batch.write.retry.wait", "60s");
-			esCfg.put("es.batch.size.entries", "200");
-			esCfg.put("es.nodes.wan.only", "true");
+			esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
+			esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
+			esCfg.put("es.batch.size.entries", esBatchSizeEntries);
+			esCfg.put("es.nodes.wan.only", esNodesWanOnly);

 			log.info("*** Start indexing");
 			JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java
@ -36,7 +36,7 @@ public class IndexOnESJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

 		final String index = parser.get("index");
@ -45,6 +45,18 @@ public class IndexOnESJob {
 		final String indexHost = parser.get("esHost");
 		log.info("indexHost: {}", indexHost);

+		final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
+		log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
+
+		final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
+		log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
+
+		final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
+		log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
+
+		final String esNodesWanOnly = parser.get("esNodesWanOnly");
+		log.info("esNodesWanOnly: {}", esNodesWanOnly);
+
 		final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();

 		final JavaRDD<String> inputRdd = ClusterUtils
@ -53,15 +65,13 @@ public class IndexOnESJob {
 			.javaRDD();

 		final Map<String, String> esCfg = new HashMap<>();
-		// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
-
 		esCfg.put("es.index.auto.create", "false");
 		esCfg.put("es.nodes", indexHost);
 		esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
-		esCfg.put("es.batch.write.retry.count", "8");
-		esCfg.put("es.batch.write.retry.wait", "60s");
-		esCfg.put("es.batch.size.entries", "200");
-		esCfg.put("es.nodes.wan.only", "true");
+		esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
+		esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
+		esCfg.put("es.batch.size.entries", esBatchSizeEntries);
+		esCfg.put("es.nodes.wan.only", esNodesWanOnly);

 		JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep0Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep0Job.java
@ -42,10 +42,10 @@ public class JoinStep0Job {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step0";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step0";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -57,10 +57,10 @@ public class JoinStep0Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/simpleEntities", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/simpleEntities", OaBrokerMainEntity.class);

 			final Dataset<RelatedDatasource> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedDatasources", RelatedDatasource.class);
+				.readPath(spark, workingDir + "/relatedDatasources", RelatedDatasource.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedDatasource>, OaBrokerMainEntity> aggr = new RelatedDatasourceAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep1Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep1Job.java
@ -40,10 +40,10 @@ public class JoinStep1Job {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step1";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step1";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -55,10 +55,10 @@ public class JoinStep1Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step0", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step0", OaBrokerMainEntity.class);

 			final Dataset<RelatedProject> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedProjects", RelatedProject.class);
+				.readPath(spark, workingDir + "/relatedProjects", RelatedProject.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedProject>, OaBrokerMainEntity> aggr = new RelatedProjectAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep2Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep2Job.java
@ -39,10 +39,10 @@ public class JoinStep2Job {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step2";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step2";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -54,10 +54,10 @@ public class JoinStep2Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step1", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step1", OaBrokerMainEntity.class);

 			final Dataset<RelatedSoftware> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedSoftwares", RelatedSoftware.class);
+				.readPath(spark, workingDir + "/relatedSoftwares", RelatedSoftware.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedSoftware>, OaBrokerMainEntity> aggr = new RelatedSoftwareAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep3Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep3Job.java
@ -40,10 +40,10 @@ public class JoinStep3Job {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step3";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step3";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -55,10 +55,10 @@ public class JoinStep3Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step2", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step2", OaBrokerMainEntity.class);

 			final Dataset<RelatedDataset> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedDatasets", RelatedDataset.class);
+				.readPath(spark, workingDir + "/relatedDatasets", RelatedDataset.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedDataset>, OaBrokerMainEntity> aggr = new RelatedDatasetAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep4Job.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/JoinStep4Job.java
@ -40,10 +40,10 @@ public class JoinStep4Job {
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String joinedEntitiesPath = workingPath + "/joinedEntities_step4";
+		final String joinedEntitiesPath = workingDir + "/joinedEntities_step4";
 		log.info("joinedEntitiesPath: {}", joinedEntitiesPath);

 		final SparkConf conf = new SparkConf();
@ -55,10 +55,10 @@ public class JoinStep4Job {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_entities");

 			final Dataset<OaBrokerMainEntity> sources = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step3", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step3", OaBrokerMainEntity.class);

 			final Dataset<RelatedPublication> typedRels = ClusterUtils
-				.readPath(spark, workingPath + "/relatedPublications", RelatedPublication.class);
+				.readPath(spark, workingDir + "/relatedPublications", RelatedPublication.class);

 			final TypedColumn<Tuple2<OaBrokerMainEntity, RelatedPublication>, OaBrokerMainEntity> aggr = new RelatedPublicationAggregator()
 				.toColumn();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java
@ -4,8 +4,13 @@ package eu.dnetlib.dhp.broker.oa;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;

+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
@ -13,6 +18,8 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.slf4j.Logger;
@ -29,7 +36,7 @@ import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
 public class PartitionEventsByDsIdJob {

 	private static final Logger log = LoggerFactory.getLogger(PartitionEventsByDsIdJob.class);
-	private static final String OPENDOAR_NSPREFIX = "opendoar____::";
+	private static final String OPENDOAR_NSPREFIX = "10|opendoar____::";

 	public static void main(final String[] args) throws Exception {

@ -37,7 +44,7 @@ public class PartitionEventsByDsIdJob {
 			IOUtils
 				.toString(
 					PartitionEventsByDsIdJob.class
-						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
+						.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json")));
 		parser.parseArgument(args);

 		final Boolean isSparkSessionManaged = Optional
@ -48,24 +55,43 @@ public class PartitionEventsByDsIdJob {

 		final SparkConf conf = new SparkConf();

-		final String eventsPath = parser.get("workingPath") + "/events";
+		final String eventsPath = parser.get("outputDir") + "/events";
 		log.info("eventsPath: {}", eventsPath);

-		final String partitionPath = parser.get("workingPath") + "/eventsByOpendoarId";
+		final String partitionPath = parser.get("outputDir") + "/eventsByOpendoarId";
 		log.info("partitionPath: {}", partitionPath);

+		final String opendoarIds = parser.get("opendoarIds");
+		log.info("opendoarIds: {}", opendoarIds);
+
+		final Set<String> validOpendoarIds = new HashSet<>();
+		if (!opendoarIds.trim().equals("-")) {
+			validOpendoarIds
+				.addAll(
+					Arrays
+						.stream(opendoarIds.split(","))
+						.map(String::trim)
+						.filter(StringUtils::isNotBlank)
+						.map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s))
+						.collect(Collectors.toSet()));
+		}
+		log.info("validOpendoarIds: {}", validOpendoarIds);
+
 		runWithSparkSession(conf, isSparkSessionManaged, spark -> {

 			ClusterUtils
 				.readPath(spark, eventsPath, Event.class)
-				.filter(e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId()))
-				.filter(e -> e.getMap().getTargetDatasourceId().contains(OPENDOAR_NSPREFIX))
-				.limit(10000)
-				.map(e -> messageFromNotification(e), Encoders.bean(ShortEventMessageWithGroupId.class))
+				.filter((FilterFunction<Event>) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId()))
+				.filter((FilterFunction<Event>) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX))
+				.filter((FilterFunction<Event>) e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId()))
+				.map(
+					(MapFunction<Event, ShortEventMessageWithGroupId>) e -> messageFromNotification(e),
+					Encoders.bean(ShortEventMessageWithGroupId.class))
 				.coalesce(1)
 				.write()
 				.partitionBy("group")
 				.mode(SaveMode.Overwrite)
+				.option("compression", "gzip")
 				.json(partitionPath);

 		});
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareGroupsJob.java
@ -45,10 +45,10 @@ public class PrepareGroupsJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String groupsPath = workingPath + "/duplicates";
+		final String groupsPath = workingDir + "/duplicates";
 		log.info("groupsPath: {}", groupsPath);

 		final SparkConf conf = new SparkConf();
@ -60,10 +60,10 @@ public class PrepareGroupsJob {
 			final LongAccumulator total = spark.sparkContext().longAccumulator("total_groups");

 			final Dataset<OaBrokerMainEntity> results = ClusterUtils
-				.readPath(spark, workingPath + "/joinedEntities_step4", OaBrokerMainEntity.class);
+				.readPath(spark, workingDir + "/joinedEntities_step4", OaBrokerMainEntity.class);

 			final Dataset<Relation> mergedRels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));

 			final TypedColumn<Tuple2<OaBrokerMainEntity, Relation>, ResultGroup> aggr = new ResultAggregator()
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasetsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasetsJob.java
@ -42,10 +42,10 @@ public class PrepareRelatedDatasetsJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedDatasets";
+		final String relsPath = workingDir + "/relatedDatasets";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
@ -62,7 +62,7 @@ public class PrepareRelatedDatasetsJob {
 				.map(ConversionUtils::oafDatasetToBrokerDataset, Encoders.bean(OaBrokerRelatedDataset.class));

 			final Dataset<Relation> rels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getDataInfo().getDeletedbyinference())
 				.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
 				.filter(r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
@ -72,7 +72,8 @@ public class PrepareRelatedDatasetsJob {
 			final Dataset<RelatedDataset> dataset = rels
 				.joinWith(datasets, datasets.col("openaireId").equalTo(rels.col("target")), "inner")
 				.map(t -> {
-					final RelatedDataset rel = new RelatedDataset(t._1.getSource(), t._2);
+					final RelatedDataset rel = new RelatedDataset(t._1.getSource(),
+						t._2);
 					rel.getRelDataset().setRelType(t._1.getRelClass());
 					return rel;
 				}, Encoders.bean(RelatedDataset.class));
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java
@ -48,10 +48,10 @@ public class PrepareRelatedDatasourcesJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedDatasources";
+		final String relsPath = workingDir + "/relatedDatasources";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedProjectsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedProjectsJob.java
@ -44,10 +44,10 @@ public class PrepareRelatedProjectsJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedProjects";
+		final String relsPath = workingDir + "/relatedProjects";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
@ -64,7 +64,7 @@ public class PrepareRelatedProjectsJob {
 				.map(ConversionUtils::oafProjectToBrokerProject, Encoders.bean(OaBrokerProject.class));

 			final Dataset<Relation> rels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getDataInfo().getDeletedbyinference())
 				.filter(r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT))
 				.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedPublicationsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedPublicationsJob.java
@ -43,10 +43,10 @@ public class PrepareRelatedPublicationsJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedPublications";
+		final String relsPath = workingDir + "/relatedPublications";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
@ -65,7 +65,7 @@ public class PrepareRelatedPublicationsJob {
 					Encoders.bean(OaBrokerRelatedPublication.class));

 			final Dataset<Relation> rels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getDataInfo().getDeletedbyinference())
 				.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
 				.filter(r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
@ -75,7 +75,8 @@ public class PrepareRelatedPublicationsJob {
 			final Dataset<RelatedPublication> dataset = rels
 				.joinWith(pubs, pubs.col("openaireId").equalTo(rels.col("target")), "inner")
 				.map(t -> {
-					final RelatedPublication rel = new RelatedPublication(t._1.getSource(), t._2);
+					final RelatedPublication rel = new RelatedPublication(
+						t._1.getSource(), t._2);
 					rel.getRelPublication().setRelType(t._1.getRelClass());
 					return rel;
 				}, Encoders.bean(RelatedPublication.class));
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedSoftwaresJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedSoftwaresJob.java
@ -44,10 +44,10 @@ public class PrepareRelatedSoftwaresJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String relsPath = workingPath + "/relatedSoftwares";
+		final String relsPath = workingDir + "/relatedSoftwares";
 		log.info("relsPath: {}", relsPath);

 		final SparkConf conf = new SparkConf();
@ -64,7 +64,7 @@ public class PrepareRelatedSoftwaresJob {
 				.map(ConversionUtils::oafSoftwareToBrokerSoftware, Encoders.bean(OaBrokerRelatedSoftware.class));

 			final Dataset<Relation> rels = ClusterUtils
-				.readPath(spark, graphPath + "/relation", Relation.class)
+				.loadRelations(graphPath, spark)
 				.filter(r -> r.getDataInfo().getDeletedbyinference())
 				.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
 				.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareSimpleEntititiesJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareSimpleEntititiesJob.java
@ -44,10 +44,10 @@ public class PrepareSimpleEntititiesJob {
 		final String graphPath = parser.get("graphPath");
 		log.info("graphPath: {}", graphPath);

-		final String workingPath = parser.get("workingPath");
-		log.info("workingPath: {}", workingPath);
+		final String workingDir = parser.get("workingDir");
+		log.info("workingDir: {}", workingDir);

-		final String simpleEntitiesPath = workingPath + "/simpleEntities";
+		final String simpleEntitiesPath = workingDir + "/simpleEntities";
 		log.info("simpleEntitiesPath: {}", simpleEntitiesPath);

 		final SparkConf conf = new SparkConf();
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java
@ -16,7 +16,24 @@ public class EnrichMissingSubject extends UpdateMatcher<OaBrokerTypedValue> {

 	public EnrichMissingSubject() {
 		super(20,
-			s -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + s.getType()),
+			s -> {
+				switch (s.getType().toLowerCase()) {
+					case "acm":
+						return Topic.ENRICH_MISSING_SUBJECT_ACM;
+					case "arxiv":
+						return Topic.ENRICH_MISSING_SUBJECT_ARXIV;
+					case "ddc":
+						return Topic.ENRICH_MISSING_SUBJECT_DDC;
+					case "jel":
+						return Topic.ENRICH_MISSING_SUBJECT_JEL;
+					case "mesh":
+						return Topic.ENRICH_MISSING_SUBJECT_MESHEUROPMC;
+					case "rvk":
+						return Topic.ENRICH_MISSING_SUBJECT_RVK;
+					default:
+						return null;
+				}
+			},
 			(p, s) -> p.getSubjects().add(s),
 			s -> subjectAsString(s));
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java
@ -16,7 +16,24 @@ public class EnrichMoreSubject extends UpdateMatcher<OaBrokerTypedValue> {

 	public EnrichMoreSubject() {
 		super(20,
-			s -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + s.getType()),
+			s -> {
+				switch (s.getType().toLowerCase()) {
+					case "acm":
+						return Topic.ENRICH_MORE_SUBJECT_ACM;
+					case "arxiv":
+						return Topic.ENRICH_MORE_SUBJECT_ARXIV;
+					case "ddc":
+						return Topic.ENRICH_MORE_SUBJECT_DDC;
+					case "jel":
+						return Topic.ENRICH_MORE_SUBJECT_JEL;
+					case "mesh":
+						return Topic.ENRICH_MORE_SUBJECT_MESHEUROPMC;
+					case "rvk":
+						return Topic.ENRICH_MORE_SUBJECT_RVK;
+					default:
+						return null;
+				}
+			},
 			(p, s) -> p.getSubjects().add(s),
 			s -> subjectAsString(s));
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java
@ -17,6 +17,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.oaf.Relation;

 public class ClusterUtils {

@ -30,6 +31,16 @@ public class ClusterUtils {
 		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
 	}

+	public static Dataset<Relation> loadRelations(final String graphPath, final SparkSession spark) {
+		return ClusterUtils
+			.readPath(spark, graphPath + "/relation", Relation.class)
+			.map(r -> {
+				r.setSource(ConversionUtils.cleanOpenaireId(r.getSource()));
+				r.setTarget(ConversionUtils.cleanOpenaireId(r.getTarget()));
+				return r;
+			}, Encoders.bean(Relation.class));
+	}
+
 	public static <R> Dataset<R> readPath(
 		final SparkSession spark,
 		final String inputPath,
@ -67,6 +78,7 @@ public class ClusterUtils {
 			.map(o -> ClusterUtils.incrementAccumulator(o, acc), Encoders.bean(clazz))
 			.write()
 			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
 			.json(path);
 	}

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java
@ -74,7 +74,7 @@ public class ConversionUtils {
 		}

 		final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset();
-		res.setOpenaireId(d.getId());
+		res.setOpenaireId(cleanOpenaireId(d.getId()));
 		res.setOriginalId(first(d.getOriginalId()));
 		res.setTitle(structPropValue(d.getTitle()));
 		res.setPids(mappedList(d.getPid(), ConversionUtils::oafPidToBrokerPid));
@ -89,7 +89,7 @@ public class ConversionUtils {
 		}

 		final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication();
-		res.setOpenaireId(p.getId());
+		res.setOpenaireId(cleanOpenaireId(p.getId()));
 		res.setOriginalId(first(p.getOriginalId()));
 		res.setTitle(structPropValue(p.getTitle()));
 		res.setPids(mappedList(p.getPid(), ConversionUtils::oafPidToBrokerPid));
@ -106,7 +106,7 @@ public class ConversionUtils {

 		final OaBrokerMainEntity res = new OaBrokerMainEntity();

-		res.setOpenaireId(result.getId());
+		res.setOpenaireId(cleanOpenaireId(result.getId()));
 		res.setOriginalId(first(result.getOriginalId()));
 		res.setTypology(classId(result.getResulttype()));
 		res.setTitles(structPropList(result.getTitle()));
@ -129,6 +129,10 @@ public class ConversionUtils {
 		return res;
 	}

+	public static String cleanOpenaireId(final String id) {
+		return id.contains("|") ? StringUtils.substringAfter(id, "|") : id;
+	}
+
 	private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) {
 		if (author == null) {
 			return null;
@ -188,7 +192,7 @@ public class ConversionUtils {
 		}

 		final OaBrokerProject res = new OaBrokerProject();
-		res.setOpenaireId(p.getId());
+		res.setOpenaireId(cleanOpenaireId(p.getId()));
 		res.setTitle(fieldValue(p.getTitle()));
 		res.setAcronym(fieldValue(p.getAcronym()));
 		res.setCode(fieldValue(p.getCode()));
@ -214,7 +218,7 @@ public class ConversionUtils {
 		}

 		final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware();
-		res.setOpenaireId(sw.getId());
+		res.setOpenaireId(cleanOpenaireId(sw.getId()));
 		res.setName(structPropValue(sw.getTitle()));
 		res.setDescription(fieldValue(sw.getDescription()));
 		res.setRepository(fieldValue(sw.getCodeRepositoryUrl()));
@ -230,7 +234,7 @@ public class ConversionUtils {

 		final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource();
 		res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname())));
-		res.setOpenaireId(ds.getId());
+		res.setOpenaireId(cleanOpenaireId(ds.getId()));
 		res.setType(classId(ds.getDatasourcetype()));
 		return res;
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java
@ -59,9 +59,18 @@ public class DatasourceRelationsAccumulator implements Serializable {
 		final DatasourceRelationsAccumulator res = new DatasourceRelationsAccumulator();
 		collectedFromSet
 			.stream()
-			.map(s -> new Tuple3<>(r.getId(), s, BrokerConstants.COLLECTED_FROM_REL))
+			.map(
+				s -> new Tuple3<>(ConversionUtils.cleanOpenaireId(r.getId()), ConversionUtils.cleanOpenaireId(s),
+					BrokerConstants.COLLECTED_FROM_REL))
 			.forEach(res::addTuple);
-		hostedBySet.stream().map(s -> new Tuple3<>(r.getId(), s, BrokerConstants.HOSTED_BY_REL)).forEach(res::addTuple);
+
+		hostedBySet
+			.stream()
+			.map(
+				s -> new Tuple3<>(ConversionUtils.cleanOpenaireId(r.getId()), ConversionUtils.cleanOpenaireId(s),
+					BrokerConstants.HOSTED_BY_REL))
+			.forEach(res::addTuple);
+
 		return res;
 	}

--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java
@ -76,6 +76,7 @@ public class EventFinder {
 		final Set<String> dsIdWhitelist,
 		final Set<String> dsIdBlacklist,
 		final Set<String> dsTypeWhitelist,
+		final Set<String> topicWhitelist,
 		final Map<String, LongAccumulator> accumulators) {

 		final List<UpdateInfo<?>> list = new ArrayList<>();
@ -84,7 +85,13 @@ public class EventFinder {
 			for (final OaBrokerRelatedDatasource targetDs : target.getDatasources()) {
 				if (verifyTarget(targetDs, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist)) {
 					for (final UpdateMatcher<?> matcher : matchers) {
-						list.addAll(matcher.searchUpdatesForRecord(target, targetDs, results.getData(), accumulators));
+						for (final UpdateInfo<?> info : matcher
+							.searchUpdatesForRecord(target, targetDs, results.getData(), accumulators)) {
+							if (topicWhitelist == null || topicWhitelist.isEmpty()
+								|| topicWhitelist.contains(info.getTopic().getPath())) {
+								list.add(info);
+							}
+						}
 					}
 				}
 			}
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/check_duplicates.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/check_duplicates.json
@ -0,0 +1,9 @@
+[
+
+	{
+		"paramName": "o",
+		"paramLongName": "outputDir",
+		"paramDescription": "the path where the data are stored",
+		"paramRequired": true
+	}
+]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/common_params.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/common_params.json
@ -7,7 +7,7 @@
 	},
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
+		"paramLongName": "workingDir",
 		"paramDescription": "the path where the temporary data will be stored",
 		"paramRequired": true
 	}
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml
@ -6,7 +6,7 @@
            <description>the path where the graph is stored</description>
        </property>
        <property>
-            <name>workingPath</name>
+            <name>outputDir</name>
            <description>the path where the the generated data will be stored</description>
        </property>
 		<property>
@ -24,6 +24,11 @@
            <value>-</value>
            <description>a black list (comma separeted, - for empty list) of datasource ids</description>
        </property>
+        <property>
+            <name>topicWhitelist</name>
+            <value>*</value>
+            <description>a white list (comma separeted, * for all) of topics</description>
+        </property>
        <property>
            <name>esEventIndexName</name>
            <description>the elasticsearch index name for events</description>
@ -36,6 +41,26 @@
            <name>esIndexHost</name>
            <description>the elasticsearch host</description>
        </property>
+        <property>
+            <name>esBatchWriteRetryCount</name>
+            <value>8</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchWriteRetryWait</name>
+            <value>60s</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchSizeEntries</name>
+            <value>200</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esNodesWanOnly</name>
+            <value>true</value>
+            <description>an ES configuration property</description>
+        </property>
        <property>
        	<name>maxIndexedEventsForDsAndTopic</name>
        	<description>the max number of events for each couple (ds/topic)</description>
@ -111,15 +136,15 @@
        </configuration>
    </global>

-    <start to="ensure_working_path"/>
+    <start to="ensure_output_dir"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    
-    <action name="ensure_working_path">
+    <action name="ensure_output_dir">
        <fs>
-            <mkdir path='${workingPath}'/>
+            <mkdir path='${outputDir}'/>
        </fs>
        <ok to="start_entities_and_rels"/>
        <error to="Kill"/>
@ -152,7 +177,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -176,7 +201,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -201,7 +226,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -225,7 +250,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -249,7 +274,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -273,7 +298,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="wait_entities_and_rels"/>
        <error to="Kill"/>
@ -299,7 +324,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="join_entities_step1"/>
        <error to="Kill"/>
@ -323,7 +348,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="join_entities_step2"/>
        <error to="Kill"/>
@ -347,7 +372,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="join_entities_step3"/>
        <error to="Kill"/>
@ -371,7 +396,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="join_entities_step4"/>
        <error to="Kill"/>
@ -395,7 +420,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="prepare_groups"/>
        <error to="Kill"/>
@ -419,7 +444,7 @@
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
        </spark>
        <ok to="generate_events"/>
        <error to="Kill"/>
@ -442,10 +467,12 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
 			<arg>--datasourceIdWhitelist</arg><arg>${datasourceIdWhitelist}</arg>
 			<arg>--datasourceTypeWhitelist</arg><arg>${datasourceTypeWhitelist}</arg>
 			<arg>--datasourceIdBlacklist</arg><arg>${datasourceIdBlacklist}</arg>
+			<arg>--topicWhitelist</arg><arg>${topicWhitelist}</arg>
        </spark>
        <ok to="index_event_subset"/>
        <error to="Kill"/>
@ -468,9 +495,13 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--index</arg><arg>${esEventIndexName}</arg>
            <arg>--esHost</arg><arg>${esIndexHost}</arg>
+            <arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
+            <arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
+            <arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
+            <arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
            <arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
        </spark>
@ -495,9 +526,13 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--index</arg><arg>${esNotificationsIndexName}</arg>
            <arg>--esHost</arg><arg>${esIndexHost}</arg>
+            <arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
+            <arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
+            <arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
+            <arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
        </spark>
        <ok to="stats"/>
@ -521,7 +556,7 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--dbUrl</arg><arg>${brokerDbUrl}</arg>
            <arg>--dbUser</arg><arg>${brokerDbUser}</arg>
            <arg>--dbPassword</arg><arg>${brokerDbPassword}</arg>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_events.json
@ -1,7 +1,13 @@
 [
+	{
+		"paramName": "wp",
+		"paramLongName": "workingDir",
+		"paramDescription": "the path where the temporary data are stored",
+		"paramRequired": true
+	},
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
+		"paramLongName": "outputDir",
 		"paramDescription": "the path where the generated events will be stored",
 		"paramRequired": true
 	},
@ -22,5 +28,11 @@
 		"paramLongName": "datasourceIdBlacklist",
 		"paramDescription": "a black list (comma separeted, - for empty list) of datasource ids",
 		"paramRequired": true
+	},
+	{
+		"paramName": "topicWhitelist",
+		"paramLongName": "topicWhitelist",
+		"paramDescription": "a white list (comma separeted, * for all) of topics",
+		"paramRequired": true
 	}
 ]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_es.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_es.json
@ -1,8 +1,8 @@
 [
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
-		"paramDescription": "the workinh path",
+		"paramLongName": "outputDir",
+		"paramDescription": "the data path",
 		"paramRequired": true
 	},
 	{
@ -16,5 +16,29 @@
 		"paramLongName": "esHost",
 		"paramDescription": "the ES host",
 		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchWriteRetryCount",
+		"paramLongName": "esBatchWriteRetryCount",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchWriteRetryWait",
+		"paramLongName": "esBatchWriteRetryWait",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchSizeEntries",
+		"paramLongName": "esBatchSizeEntries",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esNodesWanOnly",
+		"paramLongName": "esNodesWanOnly",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
 	}
 ]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_event_subset.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_event_subset.json
@ -1,8 +1,8 @@
 [
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
-		"paramDescription": "the workinh path",
+		"paramLongName": "outputDir",
+		"paramDescription": "the path where the generated data are stored",
 		"paramRequired": true
 	},
 	{
@ -16,7 +16,31 @@
 		"paramLongName": "esHost",
 		"paramDescription": "the ES host",
 		"paramRequired": true
+	},	
+	{
+		"paramName": "esBatchWriteRetryCount",
+		"paramLongName": "esBatchWriteRetryCount",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
 	},
+	{
+		"paramName": "esBatchWriteRetryWait",
+		"paramLongName": "esBatchWriteRetryWait",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchSizeEntries",
+		"paramLongName": "esBatchSizeEntries",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esNodesWanOnly",
+		"paramLongName": "esNodesWanOnly",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},	
 	{
 		"paramName": "n",
 		"paramLongName": "maxEventsForTopic",
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_notifications.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/index_notifications.json
@ -1,8 +1,8 @@
 [
 	{
 		"paramName": "o",
-		"paramLongName": "workingPath",
-		"paramDescription": "the workinh path",
+		"paramLongName": "outputDir",
+		"paramDescription": "the dir that contains the events folder",
 		"paramRequired": true
 	},
 	{
@ -17,6 +17,30 @@
 		"paramDescription": "the ES host",
 		"paramRequired": true
 	},
+	{
+		"paramName": "esBatchWriteRetryCount",
+		"paramLongName": "esBatchWriteRetryCount",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchWriteRetryWait",
+		"paramLongName": "esBatchWriteRetryWait",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esBatchSizeEntries",
+		"paramLongName": "esBatchSizeEntries",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
+	{
+		"paramName": "esNodesWanOnly",
+		"paramLongName": "esNodesWanOnly",
+		"paramDescription": "an ES configuration property",
+		"paramRequired": true
+	},
 	{
 		"paramName": "broker",
 		"paramLongName": "brokerApiBaseUrl",
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/notifications_only/oozie_app/workflow.xml
@ -6,8 +6,8 @@
            <description>the path where the graph is stored</description>
        </property>
        <property>
-            <name>workingPath</name>
-            <description>the path where the the generated data will be stored</description>
+            <name>outputDir</name>
+            <description>the path where the the generated data are stored</description>
        </property>
 		<property>
            <name>datasourceIdWhitelist</name>
@ -36,6 +36,26 @@
            <name>esIndexHost</name>
            <description>the elasticsearch host</description>
        </property>
+        <property>
+            <name>esBatchWriteRetryCount</name>
+            <value>8</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchWriteRetryWait</name>
+            <value>60s</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchSizeEntries</name>
+            <value>200</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esNodesWanOnly</name>
+            <value>true</value>
+            <description>an ES configuration property</description>
+        </property>
        <property>
        	<name>maxIndexedEventsForDsAndTopic</name>
        	<description>the max number of events for each couple (ds/topic)</description>
@ -122,9 +142,13 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
            <arg>--index</arg><arg>${esNotificationsIndexName}</arg>
            <arg>--esHost</arg><arg>${esIndexHost}</arg>
+            <arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
+            <arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
+            <arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
+            <arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
        </spark>
        <ok to="End"/>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json
@ -0,0 +1,14 @@
+[
+	{
+		"paramName": "o",
+		"paramLongName": "outputDir",
+		"paramDescription": "the path where the data will be stored",
+		"paramRequired": true
+	},
+	{
+		"paramName": "list",
+		"paramLongName": "opendoarIds",
+		"paramDescription": "the opendoar IDs whitelist (comma separated)",
+		"paramRequired": true
+	}
+]
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/config-default.xml
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml
@ -0,0 +1,99 @@
+<workflow-app name="partitionEventsByOpendoarIds" xmlns="uri:oozie:workflow:0.5">
+
+    <parameters>
+        <property>
+            <name>opendoarIds</name>
+            <description>the opendoar IDs whitelist (comma separated)</description>
+        </property>
+        <property>
+            <name>outputDir</name>
+            <description>the path where the the generated data will be stored</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+
+    <start to="opendoarPartition"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    
+   <action name="opendoarPartition">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>PartitionEventsByDsIdJob</name>
+            <class>eu.dnetlib.dhp.broker.oa.PartitionEventsByDsIdJob</class>
+            <jar>dhp-broker-events-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-cores=${sparkExecutorCores}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.shuffle.partitions=3840
+            </spark-opts>
+            <arg>--workingDir</arg><arg>${workingDir}</arg>
+            <arg>--opendoarIds</arg><arg>${opendoarIds}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    	
+    <end name="End"/>
+
+</workflow-app>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/config-default.xml
@ -0,0 +1,18 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/reindex/oozie_app/workflow.xml
@ -1,41 +1,38 @@
-<workflow-app name="create broker events - partial" xmlns="uri:oozie:workflow:0.5">
+<workflow-app name="reindex_events" xmlns="uri:oozie:workflow:0.5">

    <parameters>
        <property>
-            <name>graphInputPath</name>
-            <description>the path where the graph is stored</description>
-        </property>
-        <property>
-            <name>workingPath</name>
+            <name>outputDir</name>
            <description>the path where the the generated data will be stored</description>
-        </property>
-		<property>
-            <name>datasourceIdWhitelist</name>
-            <value>-</value>
-            <description>a white list (comma separeted, - for empty list) of datasource ids</description>
-        </property>
-		<property>
-            <name>datasourceTypeWhitelist</name>
-            <value>-</value>
-            <description>a white list (comma separeted, - for empty list) of datasource types</description>
-        </property>
-		<property>
-            <name>datasourceIdBlacklist</name>
-            <value>-</value>
-            <description>a black list (comma separeted, - for empty list) of datasource ids</description>
        </property>
        <property>
            <name>esEventIndexName</name>
            <description>the elasticsearch index name for events</description>
        </property>
-        <property>
-            <name>esNotificationsIndexName</name>
-            <description>the elasticsearch index name for notifications</description>
-        </property>
        <property>
            <name>esIndexHost</name>
            <description>the elasticsearch host</description>
        </property>
+        <property>
+            <name>esBatchWriteRetryCount</name>
+            <value>8</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchWriteRetryWait</name>
+            <value>60s</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esBatchSizeEntries</name>
+            <value>200</value>
+            <description>an ES configuration property</description>
+        </property>
+		<property>
+            <name>esNodesWanOnly</name>
+            <value>true</value>
+            <description>an ES configuration property</description>
+        </property>
        <property>
        	<name>maxIndexedEventsForDsAndTopic</name>
        	<description>the max number of events for each couple (ds/topic)</description>
@ -44,18 +41,6 @@
        	<name>brokerApiBaseUrl</name>
        	<description>the url of the broker service api</description>
        </property>
-        <property>
-        	<name>brokerDbUrl</name>
-        	<description>the url of the broker database</description>
-        </property>
-        <property>
-        	<name>brokerDbUser</name>
-        	<description>the user of the broker database</description>
-        </property>
-        <property>
-        	<name>brokerDbPassword</name>
-        	<description>the password of the broker database</description>
-        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -111,36 +96,45 @@
        </configuration>
    </global>

-    <start to="partition"/>
+    <start to="index_event_subset"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
-    
-   <action name="partition">
+        
+     <action name="index_event_subset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
-            <name>PartitionEventsByDsIdJob</name>
-            <class>eu.dnetlib.dhp.broker.oa.PartitionEventsByDsIdJob</class>
+            <name>IndexEventSubsetOnESJob</name>
+            <class>eu.dnetlib.dhp.broker.oa.IndexEventSubsetJob</class>
            <jar>dhp-broker-events-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-cores=${sparkExecutorCores}
                --executor-memory=${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.dynamicAllocation.maxExecutors="8" 
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
-            <arg>--graphPath</arg><arg>${graphInputPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--outputDir</arg><arg>${outputDir}</arg>
+            <arg>--index</arg><arg>${esEventIndexName}</arg>
+            <arg>--esHost</arg><arg>${esIndexHost}</arg>
+            <arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
+            <arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
+            <arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
+            <arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
+            <arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
+            <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
-    	
+    
+   
+
    <end name="End"/>

 </workflow-app>
--- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats_params.json
+++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats_params.json
@ -1,8 +1,8 @@
 [
 	{
-		"paramName": "wp",
-		"paramLongName": "workingPath",
-		"paramDescription": "the working path",
+		"paramName": "o",
+		"paramLongName": "outputDir",
+		"paramDescription": "the path where generated data are stored",
 		"paramRequired": true
 	},
 	{
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
@ -35,7 +35,8 @@ object DoiBoostMappingUtil {
  //STATIC STRING
  val MAG = "microsoft"
  val MAG_NAME = "Microsoft Academic Graph"
-  val ORCID = "ORCID"
+  val ORCID = "orcid"
+  val ORCID_PENDING = "orcid_pending"
  val CROSSREF = "Crossref"
  val UNPAYWALL = "UnpayWall"
  val GRID_AC = "grid.ac"
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -62,7 +62,7 @@ object SparkGenerateDoiBoost {
    val orcidPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
    fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/secondJoin")

-    logger.info("Phase 3) Join Result with MAG")
+    logger.info("Phase 4) Join Result with MAG")
    val sj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))

    val magPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -200,7 +200,7 @@ case object Crossref2Oaf {
    a.setSurname(family)
    a.setFullname(s"$given $family")
    if (StringUtils.isNotBlank(orcid))
-      a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava)
+      a.setPid(List(createSP(orcid, ORCID_PENDING, PID_TYPES, generateDataInfo())).asJava)

    a
  }
@ -248,7 +248,7 @@ case object Crossref2Oaf {


    def snsfRule(award:String): String = {
-      var tmp1 = StringUtils.substringAfter(award,"_")
+      val tmp1 = StringUtils.substringAfter(award,"_")
      val tmp2 = StringUtils.substringBefore(tmp1,"/")
      logger.debug(s"From $award to $tmp2")
      tmp2
@ -294,7 +294,7 @@ case object Crossref2Oaf {
    }

    def getProjectId (nsPrefix:String, targetId:String):String = {
-      "40|$nsPrefix::$targetId"
+      s"40|$nsPrefix::$targetId"
    }


--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala
@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.crossref

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import org.apache.commons.io.IOUtils
+import org.apache.hadoop.io.{IntWritable, Text}
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
@ -12,21 +13,23 @@ import org.slf4j.{Logger, LoggerFactory}

 object CrossrefDataset {

+  val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)

-  def extractTimestamp(input:String): Long = {
+
+  def to_item(input:String):CrossrefDT = {

    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: json4s.JValue = parse(input)
-
-    (json\"indexed"\"timestamp").extractOrElse[Long](0)
+    val ts:Long = (json \ "indexed" \ "timestamp").extract[Long]
+    val doi:String  = (json \ "DOI").extract[String]
+    CrossrefDT(doi, input, ts)

  }

-
  def main(args: Array[String]): Unit = {


-    val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)
+
    val conf: SparkConf = new SparkConf()
    val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json")))
    parser.parseArgument(args)
@ -49,9 +52,8 @@ object CrossrefDataset {
        if (a == null)
          return b

-        val tb = extractTimestamp(b.json)
-        val ta = extractTimestamp(a.json)
-        if(ta >tb) {
+
+        if(a.timestamp >b.timestamp) {
          return a
        }
        b
@ -63,9 +65,7 @@ object CrossrefDataset {
        if (a == null)
          return b

-        val tb = extractTimestamp(b.json)
-        val ta = extractTimestamp(a.json)
-        if(ta >tb) {
+        if(a.timestamp >b.timestamp) {
          return a
        }
        b
@ -78,15 +78,21 @@ object CrossrefDataset {
      override def finish(reduction: CrossrefDT): CrossrefDT = reduction
    }

-    val sourcePath:String = parser.get("sourcePath")
-    val targetPath:String = parser.get("targetPath")
+    val workingPath:String = parser.get("workingPath")

-    val ds:Dataset[CrossrefDT] = spark.read.load(sourcePath).as[CrossrefDT]

-    ds.groupByKey(_.doi)
+    val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT]
+
+
+    val update =
+      spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update",  classOf[IntWritable], classOf[Text])
+        .map(i =>CrossrefImporter.decompressBlob(i._2.toString))
+        .map(i =>to_item(i)))
+
+    main_ds.union(update).groupByKey(_.doi)
      .agg(crossrefAggregator.toColumn)
      .map(s=>s._2)
-      .write.mode(SaveMode.Overwrite).save(targetPath)
+      .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated")

  }

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala
@ -34,85 +34,21 @@ object SparkMapDumpIntoOAF {
    implicit val mapEncoderRelatons: Encoder[Relation] = Encoders.kryo[Relation]
    implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]

-    val sc = spark.sparkContext
    val targetPath = parser.get("targetPath")
    import spark.implicits._

-
    spark.read.load(parser.get("sourcePath")).as[CrossrefDT]
      .flatMap(k => Crossref2Oaf.convert(k.json))
      .filter(o => o != null)
      .write.mode(SaveMode.Overwrite).save(s"$targetPath/mixObject")

-
    val ds:Dataset[Oaf] = spark.read.load(s"$targetPath/mixObject").as[Oaf]

-    ds.filter(o => o.isInstanceOf[Publication]).map(o => o.asInstanceOf[Publication]).write.save(s"$targetPath/publication")
+    ds.filter(o => o.isInstanceOf[Publication]).map(o => o.asInstanceOf[Publication]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefPublication")

-    ds.filter(o => o.isInstanceOf[Relation]).map(o => o.asInstanceOf[Relation]).write.save(s"$targetPath/relation")
+    ds.filter(o => o.isInstanceOf[Relation]).map(o => o.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefRelation")

-    ds.filter(o => o.isInstanceOf[OafDataset]).map(o => o.asInstanceOf[OafDataset]).write.save(s"$targetPath/dataset")
-
-
-
-//
-//
-//
-//    sc.sequenceFile(parser.get("sourcePath"), classOf[IntWritable], classOf[Text])
-//      .map(k => k._2.toString).map(CrossrefImporter.decompressBlob)
-//      .flatMap(k => Crossref2Oaf.convert(k)).saveAsObjectFile(s"${targetPath}/mixObject")
-//
-//    val inputRDD = sc.objectFile[Oaf](s"${targetPath}/mixObject").filter(p=> p!= null)
-//
-//    val distinctPubs:RDD[Publication] = inputRDD.filter(k => k != null && k.isInstanceOf[Publication])
-//      .map(k => k.asInstanceOf[Publication]).map { p: Publication => Tuple2(p.getId, p) }.reduceByKey { case (p1: Publication, p2: Publication) =>
-//      var r = if (p1 == null) p2 else p1
-//      if (p1 != null && p2 != null) {
-//        if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) {
-//          if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp)
-//            r = p2
-//          else
-//            r = p1
-//        } else {
-//          r = if (p1.getLastupdatetimestamp == null) p2 else p1
-//        }
-//      }
-//      r
-//    }.map(_._2)
-//
-//    val pubs:Dataset[Publication] = spark.createDataset(distinctPubs)
-//    pubs.write.mode(SaveMode.Overwrite).save(s"${targetPath}/publication")
-//
-//
-//    val distincDatasets:RDD[OafDataset] = inputRDD.filter(k => k != null && k.isInstanceOf[OafDataset])
-//      .map(k => k.asInstanceOf[OafDataset]).map(p => Tuple2(p.getId, p)).reduceByKey { case (p1: OafDataset, p2: OafDataset) =>
-//      var r = if (p1 == null) p2 else p1
-//      if (p1 != null && p2 != null) {
-//        if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) {
-//          if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp)
-//            r = p2
-//          else
-//            r = p1
-//        } else {
-//          r = if (p1.getLastupdatetimestamp == null) p2 else p1
-//        }
-//      }
-//      r
-//    }.map(_._2)
-//
-//    spark.createDataset(distincDatasets).write.mode(SaveMode.Overwrite).save(s"${targetPath}/dataset")
-//
-//
-//
-//    val distinctRels =inputRDD.filter(k => k != null && k.isInstanceOf[Relation])
-//      .map(k => k.asInstanceOf[Relation]).map(r=> (s"${r.getSource}::${r.getTarget}",r))
-//      .reduceByKey { case (p1: Relation, p2: Relation) =>
-//        if (p1 == null) p2 else p1
-//      }.map(_._2)
-//
-//    val rels: Dataset[Relation] = spark.createDataset(distinctRels)
-//
-//    rels.write.mode(SaveMode.Overwrite).save(s"${targetPath}/relations")
+    ds.filter(o => o.isInstanceOf[OafDataset]).map(o => o.asInstanceOf[OafDataset]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefDataset")
  }


--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala
@ -21,15 +21,17 @@ object SparkImportMagIntoDataset {


  val stream = Map(
-    "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
-    "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
-    "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
+    "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
+    "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")),
+    "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")),
+    "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
    "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
    "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")),
    "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")),
    "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")),
-    "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
-    "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
+    //                                                         ['FieldOfStudyId:long', 'Rank:uint', 'NormalizedName:string', 'DisplayName:string', 'MainType:string', 'Level:int', 'PaperCount:long', 'PaperFamilyCount:long', 'CitationCount:long', 'CreatedDate:DateTime']
+    "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")),
+    "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")),
    "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")),
    "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")),
    "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")),
@ -39,7 +41,7 @@ object SparkImportMagIntoDataset {
    "PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")),
    "PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")),
    "PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")),
-    "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "CreatedDate:DateTime")),
+    "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "OnlineDate:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "FamilyRank:uint?", "CreatedDate:DateTime")),
    "RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float"))
  )

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala
@ -26,12 +26,15 @@ object SparkPreProcessMAG {
        .master(parser.get("master")).getOrCreate()

    val sourcePath = parser.get("sourcePath")
+    val workingPath = parser.get("workingPath")
+    val targetPath = parser.get("targetPath")
+
    import spark.implicits._
    implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication]
    implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)

    logger.info("Phase 1) make uninque DOI in Papers:")
-    val d: Dataset[MagPapers] = spark.read.load(s"${parser.get("sourcePath")}/Papers").as[MagPapers]
+    val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers]

    // Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one
    val result: RDD[MagPapers] = d.where(col("Doi").isNotNull)
@ -41,11 +44,12 @@ object SparkPreProcessMAG {
      .map(_._2)

    val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
-    distinctPaper.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/Papers_distinct")
+
+    distinctPaper.write.mode(SaveMode.Overwrite).save(s"$workingPath/Papers_distinct")

    logger.info("Phase 0) Enrich Publication with description")
-    val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
-    pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")
+    val pa = spark.read.load(s"$sourcePath/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
+    pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"$workingPath/PaperAbstract")

    logger.info("Phase 3) Group Author by PaperId")
    val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor]
@ -64,24 +68,24 @@ object SparkPreProcessMAG {
        } else
          mpa
      }).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors"))
-      .write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_1_paper_authors")
+      .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_1_paper_authors")

    logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors")

    val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal]

-    val papers = spark.read.load((s"${parser.get("targetPath")}/Papers_distinct")).as[MagPapers]
+    val papers = spark.read.load((s"$workingPath/Papers_distinct")).as[MagPapers]

-    val paperWithAuthors = spark.read.load(s"${parser.get("targetPath")}/merge_step_1_paper_authors").as[MagPaperWithAuthorList]
+    val paperWithAuthors = spark.read.load(s"$workingPath/merge_step_1_paper_authors").as[MagPaperWithAuthorList]

    val firstJoin = papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left")
    firstJoin.joinWith(paperWithAuthors, firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), "left")
      .map { a => ConversionUtil.createOAFFromJournalAuthorPaper(a) }
-      .write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_2")
+      .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_2")


    var magPubs: Dataset[(String, Publication)] =
-      spark.read.load(s"${parser.get("targetPath")}/merge_step_2").as[Publication]
+      spark.read.load(s"$workingPath/merge_step_2").as[Publication]
      .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]


@ -95,10 +99,10 @@ object SparkPreProcessMAG {
      .map(item => ConversionUtil.updatePubsWithConferenceInfo(item))
      .write
      .mode(SaveMode.Overwrite)
-      .save(s"${parser.get("targetPath")}/merge_step_2_conference")
+      .save(s"$workingPath/merge_step_2_conference")


-    magPubs= spark.read.load(s"${parser.get("targetPath")}/merge_step_2_conference").as[Publication]
+    magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication]
      .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]

    val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl]
@ -108,27 +112,27 @@ object SparkPreProcessMAG {
    magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left")
      .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) }
      .write.mode(SaveMode.Overwrite)
-      .save(s"${parser.get("targetPath")}/merge_step_3")
+      .save(s"$workingPath/merge_step_3")


 //    logger.info("Phase 6) Enrich Publication with description")
 //    val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
 //    pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")

-    val paperAbstract = spark.read.load((s"${parser.get("targetPath")}/PaperAbstract")).as[MagPaperAbstract]
+    val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract]


-    magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_3").as[Publication]
+    magPubs = spark.read.load(s"$workingPath/merge_step_3").as[Publication]
      .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]

    magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left")
      .map(item => ConversionUtil.updatePubsWithDescription(item)
-    ).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_4")
+    ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4")


    logger.info("Phase 7) Enrich Publication with FieldOfStudy")

-    magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_4").as[Publication]
+    magPubs = spark.read.load(s"$workingPath/merge_step_4").as[Publication]
      .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]

    val fos = spark.read.load(s"$sourcePath/FieldsOfStudy").select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType")
@ -144,14 +148,14 @@ object SparkPreProcessMAG {
      .equalTo(paperField("PaperId")), "left")
      .map(item => ConversionUtil.updatePubsWithSubject(item))
      .write.mode(SaveMode.Overwrite)
-      .save(s"${parser.get("targetPath")}/mag_publication")
+      .save(s"$workingPath/mag_publication")


-    val s:RDD[Publication] = spark.read.load(s"${parser.get("targetPath")}/mag_publication").as[Publication]
+    val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication]
      .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b))
    .map(_._2)

-    spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/mag_publication_u")
+    spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication")

  }
 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala
@ -1,6 +1,7 @@
 package eu.dnetlib.doiboost.orcid

-import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
+import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
+import eu.dnetlib.dhp.schema.orcid.OrcidDOI
 import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import eu.dnetlib.doiboost.DoiBoostMappingUtil.{ORCID, PID_TYPES, createSP, generateDataInfo, generateIdentifier}
 import org.apache.commons.lang.StringUtils
@ -43,16 +44,19 @@ object ORCIDToOAF {
  }


-  def convertTOOAF(input:ORCIDElement) :Publication = {
-    val doi = input.doi
+  def convertTOOAF(input:OrcidDOI) :Publication = {
+    val doi = input.getDoi
    val pub:Publication = new Publication
-    pub.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava)
+    pub.setPid(List(createSP(doi.toLowerCase, "doi", PID_TYPES)).asJava)
    pub.setDataInfo(generateDataInfo())
    pub.setId(generateIdentifier(pub, doi.toLowerCase))
    try{
-      pub.setAuthor(input.authors.map(a=> {
-        generateAuthor(a.name, a.surname, a.creditName, a.oid)
-      }).asJava)
+
+      val l:List[Author]= input.getAuthors.asScala.map(a=> {
+              generateAuthor(a.getName, a.getSurname, a.getCreditName, a.getOid)
+            })(collection.breakOut)
+
+      pub.setAuthor(l.asJava)
      pub.setCollectedfrom(List(DoiBoostMappingUtil.createORIDCollectedFrom()).asJava)
      pub.setDataInfo(DoiBoostMappingUtil.generateDataInfo())
      pub
@ -63,6 +67,13 @@ object ORCIDToOAF {
    }
  }

+  def generateOricPIDDatainfo():DataInfo = {
+    val di =DoiBoostMappingUtil.generateDataInfo("0.91")
+    di.getProvenanceaction.setClassid("sysimport:crosswalk:entityregistry")
+    di.getProvenanceaction.setClassname("Harvested")
+    di
+  }
+
  def generateAuthor(given: String, family: String, fullName:String, orcid: String): Author = {
    val a = new Author
    a.setName(given)
@ -72,7 +83,7 @@ object ORCIDToOAF {
    else
      a.setFullname(s"$given $family")
    if (StringUtils.isNotBlank(orcid))
-      a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava)
+      a.setPid(List(createSP(orcid, ORCID, PID_TYPES, generateOricPIDDatainfo())).asJava)

    a
  }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala
@ -1,21 +1,72 @@
 package eu.dnetlib.doiboost.orcid

+import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
 import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.oa.merge.AuthorMerger
 import eu.dnetlib.dhp.schema.oaf.Publication
+import eu.dnetlib.dhp.schema.orcid.OrcidDOI
 import eu.dnetlib.doiboost.mag.ConversionUtil
 import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
 import org.slf4j.{Logger, LoggerFactory}

 object SparkConvertORCIDToOAF {
+  val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass)
+
+  def getPublicationAggregator(): Aggregator[(String, Publication), Publication, Publication] = new Aggregator[(String, Publication), Publication, Publication]{
+
+    override def zero: Publication = new Publication()
+
+    override def reduce(b: Publication, a: (String, Publication)): Publication = {
+      b.mergeFrom(a._2)
+      b.setAuthor(AuthorMerger.mergeAuthor(a._2.getAuthor, b.getAuthor))
+      if (b.getId == null)
+        b.setId(a._2.getId)
+      b
+    }


+    override def merge(wx: Publication, wy: Publication): Publication = {
+      wx.mergeFrom(wy)
+      wx.setAuthor(AuthorMerger.mergeAuthor(wy.getAuthor, wx.getAuthor))
+      if(wx.getId == null && wy.getId.nonEmpty)
+        wx.setId(wy.getId)
+      wx
+    }
+    override def finish(reduction: Publication): Publication = reduction
+
+    override def bufferEncoder: Encoder[Publication] =
+      Encoders.kryo(classOf[Publication])
+
+    override def outputEncoder: Encoder[Publication] =
+      Encoders.kryo(classOf[Publication])
+  }
+
+def run(spark:SparkSession,sourcePath:String, targetPath:String):Unit = {
+  implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
+  implicit val mapOrcid: Encoder[OrcidDOI] = Encoders.kryo[OrcidDOI]
+  implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
+
+  val mapper = new ObjectMapper()
+  mapper.getDeserializationConfig.withFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
+
+  val dataset:Dataset[OrcidDOI] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => mapper.readValue(s,classOf[OrcidDOI])))
+
+  logger.info("Converting ORCID to OAF")
+  dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null)
+    .map(d => (d.getId, d))
+    .groupByKey(_._1)(Encoders.STRING)
+    .agg(getPublicationAggregator().toColumn)
+    .map(p => p._2)
+    .write.mode(SaveMode.Overwrite).save(targetPath)
+}

  def main(args: Array[String]): Unit = {

-    val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass)
+
    val conf: SparkConf = new SparkConf()
    val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json")))
    parser.parseArgument(args)
@ -26,19 +77,12 @@ object SparkConvertORCIDToOAF {
        .appName(getClass.getSimpleName)
        .master(parser.get("master")).getOrCreate()

-    implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
-    implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
-    import spark.implicits._
+
+
    val sourcePath = parser.get("sourcePath")
    val targetPath = parser.get("targetPath")
-    val dataset:Dataset[ORCIDElement] = spark.read.json(sourcePath).as[ORCIDElement]
+    run(spark, sourcePath, targetPath)

-
-    logger.info("Converting ORCID to OAF")
-    val d:RDD[Publication] = dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null).map(p=>(p.getId,p)).rdd.reduceByKey(ConversionUtil.mergePublication)
-      .map(_._2)
-
-    spark.createDataset(d).as[Publication].write.mode(SaveMode.Overwrite).save(targetPath)
  }

 }
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml
@ -16,88 +16,86 @@
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
-<!--        <property>-->
-<!--            <name>timestamp</name>-->
-<!--            <description>Timestamp for incremental Harvesting</description>-->
-<!--        </property>-->
+        <property>
+            <name>timestamp</name>
+            <description>Timestamp for incremental Harvesting</description>
+        </property>

    </parameters>

-    <start to="ExtractCrossrefToOAF"/>
+    <start to="ImportCrossRef"/>


    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-<!--    <action name="ResetWorkingPath">-->
-<!--        <fs>-->
-<!--            <delete path='${workingPath}/input/crossref/index_dump'/>-->
-<!--&lt;!&ndash;            <mkdir path='${workingPath}/input/crossref'/>&ndash;&gt;-->
-<!--        </fs>-->
-<!--        <ok to="ImportCrossRef"/>-->
-<!--        <error to="Kill"/>-->
-<!--    </action>-->
+    <action name="ImportCrossRef">
+        <java>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.doiboost.crossref.CrossrefImporter</main-class>
+            <arg>-t</arg><arg>${workingPath}/input/crossref/index_update</arg>
+            <arg>-n</arg><arg>${nameNode}</arg>
+            <arg>-ts</arg><arg>${timestamp}</arg>
+        </java>
+        <ok to="GenerateDataset"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="GenerateDataset">
+            <spark xmlns="uri:oozie:spark-action:0.2">
+                <master>yarn-cluster</master>
+                <mode>cluster</mode>
+                <name>ExtractCrossrefToOAF</name>
+                <class>eu.dnetlib.doiboost.crossref.CrossrefDataset</class>
+                <jar>dhp-doiboost-${projectVersion}.jar</jar>
+                <spark-opts>
+                    --executor-memory=${sparkExecutorMemory}
+                    --executor-cores=${sparkExecutorCores}
+                    --driver-memory=${sparkDriverMemory}
+                    --conf spark.sql.shuffle.partitions=3840
+                    ${sparkExtraOPT}
+                </spark-opts>
+                <arg>--workingPath</arg><arg>/data/doiboost/input/crossref</arg>
+                <arg>--master</arg><arg>yarn-cluster</arg>
+            </spark>
+            <ok to="RenameDataset"/>
+            <error to="Kill"/>
+    </action>
+
+    <action name="RenameDataset">
+        <fs>
+            <delete path='${workingPath}/input/crossref/crossref_ds'/>
+            <move source="${workingPath}/input/crossref/crossref_ds_updated"
+                  target="${workingPath}/input/crossref/crossref_ds"/>
+        </fs>
+        <ok to="ConvertCrossrefToOAF"/>
+        <error to="Kill"/>
+    </action>


-
-<!--    <action name="ImportCrossRef">-->
-<!--        <java>-->
-<!--            <job-tracker>${jobTracker}</job-tracker>-->
-<!--            <name-node>${nameNode}</name-node>-->
-<!--            <main-class>eu.dnetlib.doiboost.crossref.CrossrefImporter</main-class>-->
-<!--            <arg>-t</arg><arg>${workingPath}/input/crossref/index_dump_1</arg>-->
-<!--            <arg>-n</arg><arg>${nameNode}</arg>-->
-<!--            <arg>-ts</arg><arg>${timestamp}</arg>-->
-<!--        </java>-->
-<!--        <ok to="End"/>-->
-<!--        <error to="Kill"/>-->
-<!--    </action>-->
-
-
-    <action name="ExtractCrossrefToOAF">
+    <action name="ConvertCrossrefToOAF">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
-            <name>ExtractCrossrefToOAF</name>
+            <name>ConvertCrossrefToOAF</name>
            <class>eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF</class>
            <jar>dhp-doiboost-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
+                --conf spark.sql.shuffle.partitions=3840
                ${sparkExtraOPT}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${workingPath}/input/crossref/crossref_ds</arg>
-            <arg>--targetPath</arg><arg>${workingPath}/input/crossref</arg>
+            <arg>--targetPath</arg><arg>${workingPath}/process/</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>

-
-
-<!--    <action name="GenerateDataset">-->
-<!--        <spark xmlns="uri:oozie:spark-action:0.2">-->
-<!--            <master>yarn-cluster</master>-->
-<!--            <mode>cluster</mode>-->
-<!--            <name>ExtractCrossrefToOAF</name>-->
-<!--            <class>eu.dnetlib.doiboost.crossref.CrossrefDataset</class>-->
-<!--            <jar>dhp-doiboost-${projectVersion}.jar</jar>-->
-<!--            <spark-opts>-->
-<!--                &#45;&#45;executor-memory=${sparkExecutorMemory}-->
-<!--                &#45;&#45;executor-cores=${sparkExecutorCores}-->
-<!--                &#45;&#45;driver-memory=${sparkDriverMemory}-->
-<!--                ${sparkExtraOPT}-->
-<!--            </spark-opts>-->
-<!--            <arg>&#45;&#45;sourcePath</arg><arg>/data/doiboost/crossref/cr_dataset</arg>-->
-<!--            <arg>&#45;&#45;targetPath</arg><arg>/data/doiboost/crossref/crossrefDataset</arg>-->
-<!--            <arg>&#45;&#45;master</arg><arg>yarn-cluster</arg>-->
-<!--        </spark>-->
-<!--        <ok to="End"/>-->
-<!--        <error to="Kill"/>-->
-<!--    </action>-->
-
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json
@ -1,6 +1,5 @@
 [
-  {"paramName":"s",   "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read",  "paramRequired": true},
-  {"paramName":"t",   "paramLongName":"targetPath", "paramDescription": "the working dir path",                      "paramRequired": true},
+  {"paramName":"w",   "paramLongName":"workingPath", "paramDescription": "the working dir path",                      "paramRequired": true},
  {"paramName":"m",   "paramLongName":"master",     "paramDescription": "the master name",                          "paramRequired": true}

 ]
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml
@ -39,14 +39,7 @@
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="ResetWorkingPath">
-        <fs>
-            <delete path='${workingDirPath}'/>
-            <mkdir path='${workingDirPath}'/>
-        </fs>
-        <ok to="CreateDOIBoost"/>
-        <error to="Kill"/>
-    </action>
+

    <action name="CreateDOIBoost">
        <spark xmlns="uri:oozie:spark-action:0.2">
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml
@ -8,6 +8,10 @@
            <name>targetPath</name>
            <description>the working dir base path</description>
        </property>
+        <property>
+            <name>workingPath</name>
+            <description>the working dir base path</description>
+        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -31,10 +35,10 @@

    <action name="ResetWorkingPath">
        <fs>
-            <delete path='${targetPath}'/>
-            <mkdir path='${targetPath}'/>
+            <delete path='${workingPath}'/>
+            <mkdir path='${workingPath}'/>
        </fs>
-        <ok to="PreprocessMag"/>
+        <ok to="ConvertMagToDataset"/>
        <error to="Kill"/>
    </action>

@ -52,10 +56,10 @@
                ${sparkExtraOPT}
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
-            <arg>--targetPath</arg><arg>${targetPath}</arg>
+            <arg>--targetPath</arg><arg>${workingPath}</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
-        <ok to="End"/>
+        <ok to="PreprocessMag"/>
        <error to="Kill"/>
    </action>

@ -65,7 +69,7 @@
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn-cluster</master>
            <mode>cluster</mode>
-            <name>Convert Mag to Dataset</name>
+            <name>Convert Mag to OAF Dataset</name>
            <class>eu.dnetlib.doiboost.mag.SparkPreProcessMAG</class>
            <jar>dhp-doiboost-${projectVersion}.jar</jar>
            <spark-opts>
@ -75,7 +79,8 @@
                --conf spark.sql.shuffle.partitions=3840
                ${sparkExtraOPT}
            </spark-opts>
-            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--sourcePath</arg><arg>${workingPath}</arg>
+            <arg>--workingPath</arg><arg>${workingPath}/process</arg>
            <arg>--targetPath</arg><arg>${targetPath}</arg>
            <arg>--master</arg><arg>yarn-cluster</arg>
        </spark>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json
@ -1,6 +1,7 @@
 [
  {"paramName":"s",   "paramLongName":"sourcePath", "paramDescription": "the base path of MAG input",  "paramRequired": true},
-  {"paramName":"t",   "paramLongName":"targetPath", "paramDescription": "the working dir path",                      "paramRequired": true},
+  {"paramName":"t",   "paramLongName":"targetPath", "paramDescription": "the target dir path",                      "paramRequired": true},
+  {"paramName":"w",   "paramLongName":"workingPath", "paramDescription": "the working dir path",                      "paramRequired": true},
  {"paramName":"m",   "paramLongName":"master",     "paramDescription": "the master name",                          "paramRequired": true}

 ]
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala
@ -1,5 +1,8 @@
 package eu.dnetlib.doiboost.orcid

+import eu.dnetlib.dhp.schema.oaf.Publication
+import eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF.getClass
+import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
 import org.codehaus.jackson.map.ObjectMapper
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
@ -21,6 +24,30 @@ class MappingORCIDToOAFTest {
    })
  }

+//  @Test
+//  def testOAFConvert():Unit ={
+//
+//    val spark: SparkSession =
+//      SparkSession
+//        .builder()
+//        .appName(getClass.getSimpleName)
+//        .master("local[*]").getOrCreate()
+//
+//
+//    SparkConvertORCIDToOAF.run( spark,"/Users/sandro/Downloads/orcid", "/Users/sandro/Downloads/orcid_oaf")
+//    implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
+//
+//    val df = spark.read.load("/Users/sandro/Downloads/orcid_oaf").as[Publication]
+//    println(df.first.getId)
+//    println(mapper.writeValueAsString(df.first()))
+//
+//
+//
+//
+//  }
+
+
+



--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java
@ -108,7 +108,7 @@ public class SparkResultToCommunityFromOrganizationJob {
 					.stream()
 					.map(con -> con.getId())
 					.collect(Collectors.toList());
-				Result res = new Result();
+				R res = (R) ret.getClass().newInstance();
 				res.setId(ret.getId());
 				List<Context> propagatedContexts = new ArrayList<>();
 				for (String cId : communitySet) {
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java
@ -130,7 +130,7 @@ public class SparkResultToCommunityThroughSemRelJob {
 						})
 					.filter(Objects::nonNull)
 					.collect(Collectors.toList());
-				Result r = new Result();
+				R r = (R) ret.getClass().newInstance();
 				r.setId(ret.getId());
 				r.setContext(contextList);
 				ret.mergeFrom(r);
--- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java
+++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java
@ -24,7 +24,6 @@ import org.slf4j.LoggerFactory;

 import com.fasterxml.jackson.databind.ObjectMapper;

-import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest;
 import eu.dnetlib.dhp.schema.oaf.Dataset;

 public class ResultToCommunityJobTest {
@ -66,7 +65,7 @@ public class ResultToCommunityJobTest {
 	}

 	@Test
-	public void test1() throws Exception {
+	public void testSparkResultToCommunityThroughSemRelJob() throws Exception {
 		SparkResultToCommunityThroughSemRelJob
 			.main(
 				new String[] {
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java
@ -190,15 +190,6 @@ public class CleaningFunctions {
 					}
 				}

-				final Set<String> collectedFrom = Optional
-					.ofNullable(r.getCollectedfrom())
-					.map(
-						c -> c
-							.stream()
-							.map(KeyValue::getKey)
-							.collect(Collectors.toCollection(HashSet::new)))
-					.orElse(new HashSet<>());
-
 				for (Author a : r.getAuthor()) {
 					if (Objects.isNull(a.getPid())) {
 						a.setPid(Lists.newArrayList());
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
@ -75,6 +75,8 @@ public abstract class AbstractMdRecordToOafMapper {
 	protected static final Qualifier MAG_PID_TYPE = qualifier(
 		"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);

+	protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
+
 	protected static final Map<String, String> nsContext = new HashMap<>();

 	static {
@ -244,25 +246,54 @@ public abstract class AbstractMdRecordToOafMapper {

 			final String originalId = ((Node) o).getText();

+			final String validationdDate = ((Node) o).valueOf("@validationDate");
+
 			if (StringUtils.isNotBlank(originalId)) {
 				final String projectId = createOpenaireId(40, originalId, true);

 				res
 					.add(
-						getRelation(
+						getRelationWithValidationDate(
 							docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info,
-							lastUpdateTimestamp));
+							lastUpdateTimestamp, validationdDate));
 				res
 					.add(
-						getRelation(
+						getRelationWithValidationDate(
 							projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info,
-							lastUpdateTimestamp));
+							lastUpdateTimestamp, validationdDate));
 			}
 		}

 		return res;
 	}

+	protected Relation getRelationWithValidationDate(final String source,
+		final String target,
+		final String relType,
+		final String subRelType,
+		final String relClass,
+		final KeyValue collectedFrom,
+		final DataInfo info,
+		final long lastUpdateTimestamp,
+		final String validationDate) {
+
+		final Relation r = getRelation(
+			source, target, relType, subRelType, relClass, collectedFrom, info, lastUpdateTimestamp);
+		r.setValidated(StringUtils.isNotBlank(validationDate));
+		r.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null);
+
+		if (StringUtils.isNotBlank(validationDate)) {
+			r.setValidated(true);
+			r.setValidationDate(validationDate);
+			r.getDataInfo().setTrust(DEFAULT_TRUST_FOR_VALIDATED_RELS);
+		} else {
+			r.setValidated(false);
+			r.setValidationDate(null);
+		}
+
+		return r;
+	}
+
 	protected Relation getRelation(final String source,
 		final String target,
 		final String relType,
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@ -23,7 +23,15 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
-import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.asString;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
+import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;

 import java.io.Closeable;
 import java.io.IOException;
@ -462,44 +470,48 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i

 				return Arrays.asList(r);
 			} else {
+				final String validationDate = rs.getString("curation_date");
+
 				final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false);
 				final String targetId = createOpenaireId(rs.getString(TARGET_TYPE), rs.getString("target_id"), false);

 				final Relation r1 = new Relation();
 				final Relation r2 = new Relation();

-				if (rs.getString(SOURCE_TYPE).equals("project")) {
-					r1.setCollectedfrom(collectedFrom);
-					r1.setRelType(RESULT_PROJECT);
-					r1.setSubRelType(OUTCOME);
-					r1.setRelClass(PRODUCES);
-
-					r2.setCollectedfrom(collectedFrom);
-					r2.setRelType(RESULT_PROJECT);
-					r2.setSubRelType(OUTCOME);
-					r2.setRelClass(IS_PRODUCED_BY);
-				} else {
-					r1.setCollectedfrom(collectedFrom);
-					r1.setRelType(RESULT_RESULT);
-					r1.setSubRelType(RELATIONSHIP);
-					r1.setRelClass(IS_RELATED_TO);
-
-					r2.setCollectedfrom(collectedFrom);
-					r2.setRelType(RESULT_RESULT);
-					r2.setSubRelType(RELATIONSHIP);
-					r2.setRelClass(IS_RELATED_TO);
-				}
-
+				r1.setValidated(true);
+				r1.setValidationDate(validationDate);
+				r1.setCollectedfrom(collectedFrom);
 				r1.setSource(sourceId);
 				r1.setTarget(targetId);
 				r1.setDataInfo(info);
 				r1.setLastupdatetimestamp(lastUpdateTimestamp);

+				r2.setValidationDate(validationDate);
+				r2.setValidated(true);
+				r2.setCollectedfrom(collectedFrom);
 				r2.setSource(targetId);
 				r2.setTarget(sourceId);
 				r2.setDataInfo(info);
 				r2.setLastupdatetimestamp(lastUpdateTimestamp);

+				if (rs.getString(SOURCE_TYPE).equals("project")) {
+					r1.setRelType(RESULT_PROJECT);
+					r1.setSubRelType(OUTCOME);
+					r1.setRelClass(PRODUCES);
+
+					r2.setRelType(RESULT_PROJECT);
+					r2.setSubRelType(OUTCOME);
+					r2.setRelClass(IS_PRODUCED_BY);
+				} else {
+					r1.setRelType(RESULT_RESULT);
+					r1.setSubRelType(RELATIONSHIP);
+					r1.setRelClass(IS_RELATED_TO);
+
+					r2.setRelType(RESULT_RESULT);
+					r2.setSubRelType(RELATIONSHIP);
+					r2.setRelClass(IS_RELATED_TO);
+				}
+
 				return Arrays.asList(r1, r2);
 			}

--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryClaims.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryClaims.sql
@ -1 +1 @@
-SELECT source_type, source_id, target_type, target_id, semantics FROM claim WHERE approved=TRUE;
+SELECT source_type, source_id, target_type, target_id, semantics, curation_date::text FROM claim WHERE approved=TRUE;
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -141,7 +141,10 @@ public class MappersTest {
 		assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
 		assertTrue(StringUtils.isNotBlank(r1.getRelType()));
 		assertTrue(StringUtils.isNotBlank(r2.getRelType()));
-
+		assertTrue(r1.getValidated());
+		assertTrue(r2.getValidated());
+		assertEquals(r1.getValidationDate(), "2020-01-01");
+		assertEquals(r2.getValidationDate(), "2020-01-01");
 		// System.out.println(new ObjectMapper().writeValueAsString(p));
 		// System.out.println(new ObjectMapper().writeValueAsString(r1));
 		// System.out.println(new ObjectMapper().writeValueAsString(r2));
@ -246,6 +249,10 @@ public class MappersTest {
 		assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
 		assertTrue(StringUtils.isNotBlank(r1.getRelType()));
 		assertTrue(StringUtils.isNotBlank(r2.getRelType()));
+		assertTrue(r1.getValidated());
+		assertTrue(r2.getValidated());
+		assertEquals(r1.getValidationDate(), "2020-01-01");
+		assertEquals(r2.getValidationDate(), "2020-01-01");
 	}

 	@Test
@ -355,7 +362,21 @@ public class MappersTest {
 		assertValidId(p.getId());
 		assertValidId(p.getCollectedfrom().get(0).getKey());
 		assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
-		System.out.println(p.getTitle().get(0).getValue());
+		assertEquals(1, p.getAuthor().size());
+		assertEquals("OPEN", p.getBestaccessright().getClassid());
+		assertTrue(StringUtils.isNotBlank(p.getPid().get(0).getValue()));
+		assertTrue(StringUtils.isNotBlank(p.getPid().get(0).getQualifier().getClassid()));
+		assertEquals("dataset", p.getResulttype().getClassname());
+		assertEquals(1, p.getInstance().size());
+		assertEquals("OPEN", p.getInstance().get(0).getAccessright().getClassid());
+		assertValidId(p.getInstance().get(0).getCollectedfrom().getKey());
+		assertValidId(p.getInstance().get(0).getHostedby().getKey());
+		assertEquals(
+			"http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getValue());
+		assertEquals(1, p.getInstance().get(0).getUrl().size());
+//		System.out.println(p.getInstance().get(0).getUrl().get(0));
+//		System.out.println(p.getInstance().get(0).getHostedby().getValue());
+		System.out.println(p.getPid().get(0).getValue());
 	}

 	@Test
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml
@ -51,7 +51,7 @@
    <!--<dr:CobjCategory type="publication">0001</dr:CobjCategory>-->
    <dr:CobjCategory>0001</dr:CobjCategory>
    <oaf:dateAccepted>2017-01-01</oaf:dateAccepted>
-    <oaf:projectid>corda_______::226852</oaf:projectid>
+    <oaf:projectid validationDate="2020-01-01">corda_______::226852</oaf:projectid>
    <oaf:accessrights>OPEN</oaf:accessrights>
    <oaf:hostedBy id="openaire____::issn226852" name="One Ecosystem"/>
    <oaf:collectedFrom
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_dataset.xml
@ -89,7 +89,7 @@
    <oaf:language>und</oaf:language>
    <oaf:concept id="https://zenodo.org/communities/epfl"/>
    <oaf:hostedBy id="re3data_____::r3d100010468" name="Zenodo"/>
-    <oaf:projectid>corda_______::226852</oaf:projectid>
+    <oaf:projectid validationDate="2020-01-01">corda_______::226852</oaf:projectid>
    <oaf:collectedFrom id="re3data_____::r3d100010468" name="Zenodo"/>
    <oaf:refereed>0001</oaf:refereed>s
  </metadata>
--- a/dhp-workflows/dhp-graph-provision/pom.xml
+++ b/dhp-workflows/dhp-graph-provision/pom.xml
@ -54,6 +54,13 @@
            <artifactId>spark-solr</artifactId>
        </dependency>

+        <!-- the solr-test-framework requires the old junit:junit test framework -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.12</version>
+            <scope>test</scope>
+        </dependency>
        <dependency>
            <groupId>org.apache.solr</groupId>
            <artifactId>solr-test-framework</artifactId>
@ -140,6 +147,12 @@
        <dependency>
            <groupId>org.apache.zookeeper</groupId>
            <artifactId>zookeeper</artifactId>
+            <exclusions>
+                <exclusion>
+                    <groupId>junit</groupId>
+                    <artifactId>junit</artifactId>
+                </exclusion>
+            </exclusions>
        </dependency>


--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java
@ -0,0 +1,52 @@
+
+package eu.dnetlib.dhp.oa.provision.utils;
+
+import java.util.Comparator;
+import java.util.Optional;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class AuthorPidTypeComparator implements Comparator<StructuredProperty> {
+
+	@Override
+	public int compare(StructuredProperty left, StructuredProperty right) {
+
+		String lClass = Optional
+			.ofNullable(left)
+			.map(StructuredProperty::getQualifier)
+			.map(Qualifier::getClassid)
+			.orElse(null);
+
+		String rClass = Optional
+			.ofNullable(right)
+			.map(StructuredProperty::getQualifier)
+			.map(Qualifier::getClassid)
+			.orElse(null);
+
+		if (lClass == null && rClass == null)
+			return 0;
+		if (lClass == null)
+			return 1;
+		if (rClass == null)
+			return -1;
+
+		if (lClass.equals(rClass))
+			return 0;
+
+		if (lClass.equals(ModelConstants.ORCID))
+			return -1;
+		if (rClass.equals(ModelConstants.ORCID))
+			return 1;
+
+		if (lClass.equals(ModelConstants.ORCID_PENDING))
+			return -1;
+		if (rClass.equals(ModelConstants.ORCID_PENDING))
+			return 1;
+
+		return 0;
+	}
+
+}
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
@ -254,6 +254,18 @@ public class XmlRecordFactory implements Serializable {
 														p -> p,
 														(p1, p2) -> p1))
 											.values()
+											.stream()
+											.collect(
+												Collectors
+													.groupingBy(
+														p -> p.getValue(),
+														Collectors
+															.mapping(
+																p -> p,
+																Collectors.minBy(new AuthorPidTypeComparator()))))
+											.values()
+											.stream()
+											.map(op -> op.get())
 											.forEach(
 												sp -> {
 													String pidType = getAuthorPidType(sp.getQualifier().getClassid());
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java
@ -6,15 +6,13 @@ import org.apache.solr.client.solrj.response.UpdateResponse;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;

-import junit.framework.Assert;
-
 public class SolrAdminApplicationTest extends SolrTest {

 	@Test
 	public void testPing() throws Exception {
 		SolrPingResponse pingResponse = miniCluster.getSolrClient().ping();
 		log.info("pingResponse: '{}'", pingResponse.getStatus());
-		Assert.assertTrue(pingResponse.getStatus() == 0);
+		Assertions.assertTrue(pingResponse.getStatus() == 0);
 	}

 	@Test
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
@ -5,71 +5,42 @@ import static org.junit.jupiter.api.Assertions.*;

 import java.io.IOException;
 import java.io.StringReader;
-import java.util.List;

 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.io.SAXReader;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
-import org.mockito.Mock;

+import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
 import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
 import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
-import eu.dnetlib.dhp.schema.oaf.Oaf;
-import eu.dnetlib.dhp.schema.oaf.OafEntity;
-import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
 import eu.dnetlib.dhp.schema.oaf.Publication;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

-//TODO to enable it we need to update the joined_entity.json test file
-//@Disabled
 public class XmlRecordFactoryTest {

 	private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource";

+	private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
+		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+
 	@Test
 	public void testXMLRecordFactory() throws IOException, DocumentException {

-		String json = IOUtils.toString(getClass().getResourceAsStream("joined_entity.json"));
-
-		assertNotNull(json);
-		JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class);
-		assertNotNull(je);
-
-		Document doc = buildXml(je);
-		//// TODO specific test assertion on doc
-	}
-
-	@Test
-	void testBologna() throws IOException, DocumentException {
-		final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json"));
-		Publication oaf = new ObjectMapper().readValue(json, Publication.class);
-		assertNotNull(oaf);
-		JoinedEntity je = new JoinedEntity();
-		je.setEntity(oaf);
-		assertNotNull(je);
-
-		Document doc = buildXml(je);
-		// TODO specific test assertion on doc
-
-		System.out.println(doc.asXML());
-
-	}
-
-	private Document buildXml(JoinedEntity je) throws DocumentException {
 		ContextMapper contextMapper = new ContextMapper();

 		XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
 			otherDsTypeId);

-		String xml = xmlRecordFactory.build(je);
+		Publication p = OBJECT_MAPPER
+			.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
+
+		String xml = xmlRecordFactory.build(new JoinedEntity<>(p));

 		assertNotNull(xml);

@ -77,8 +48,14 @@ public class XmlRecordFactoryTest {

 		assertNotNull(doc);

-		// TODO add assertions based of values extracted from the XML record
+		System.out.println(doc.asXML());

-		return doc;
+		Assertions.assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
+		Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
+
+		Assertions.assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid"));
+		Assertions.assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending"));
+
+		// TODO add assertions based of values extracted from the XML record
 	}
 }
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json
@ -0,0 +1,820 @@
+{
+  "author": [
+    {
+      "affiliation": [],
+      "fullname": "Lee, Jaehyun",
+      "name": "Jaehyun",
+      "pid": [
+        {
+          "qualifier": {
+            "classid": "orcid",
+            "classname": "Open Researcher and Contributor ID",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "0000-0001-9613-6638"
+        },
+        {
+          "qualifier": {
+            "classid": "orcid_pending",
+            "classname": "Open Researcher and Contributor ID",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "0000-0001-9613-6639"
+        }
+      ],
+      "rank": 1,
+      "surname": "Lee"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Berrada, Salim",
+      "name": "Salim",
+      "pid": [
+        {
+          "qualifier": {
+            "classid": "orcid",
+            "classname": "Open Researcher and Contributor ID",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "0000-0001-9613-9956"
+        },
+        {
+          "qualifier": {
+            "classid": "orcid_pending",
+            "classname": "Open Researcher and Contributor ID",
+            "schemeid": "dnet:pid_types",
+            "schemename": "dnet:pid_types"
+          },
+          "value": "0000-0001-9613-9956"
+        }
+      ],
+      "rank": 2,
+      "surname": "Berrada"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Adamu-Lema, Fikru",
+      "name": "Fikru",
+      "pid": [],
+      "rank": 3,
+      "surname": "Adamu-Lema"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Nagy, Nicole",
+      "name": "Nicole",
+      "pid": [],
+      "rank": 4,
+      "surname": "Nagy"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Georgiev, Vihar P.",
+      "name": "Vihar P.",
+      "pid": [],
+      "rank": 5,
+      "surname": "Georgiev"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Sadi, Toufik",
+      "name": "Toufik",
+      "pid": [],
+      "rank": 6,
+      "surname": "Sadi"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Liang, Jie",
+      "name": "Jie",
+      "pid": [],
+      "rank": 7,
+      "surname": "Liang"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Ramos, Raphael",
+      "name": "Raphael",
+      "pid": [],
+      "rank": 8,
+      "surname": "Ramos"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Carrillo-Nunez, Hamilton",
+      "name": "Hamilton",
+      "pid": [],
+      "rank": 9,
+      "surname": "Carrillo-Nunez"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Kalita, Dipankar",
+      "name": "Dipankar",
+      "pid": [],
+      "rank": 10,
+      "surname": "Kalita"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Lilienthal, Katharina",
+      "name": "Katharina",
+      "pid": [],
+      "rank": 11,
+      "surname": "Lilienthal"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Wislicenus, Marcus",
+      "name": "Marcus",
+      "pid": [],
+      "rank": 12,
+      "surname": "Wislicenus"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Pandey, Reeturaj",
+      "name": "Reeturaj",
+      "pid": [],
+      "rank": 13,
+      "surname": "Pandey"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Chen, Bingan",
+      "name": "Bingan",
+      "pid": [],
+      "rank": 14,
+      "surname": "Chen"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Teo, Kenneth B.K.",
+      "name": "Kenneth B. K.",
+      "pid": [],
+      "rank": 15,
+      "surname": "Teo"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Goncalves, Goncalo",
+      "name": "Goncalo",
+      "pid": [],
+      "rank": 16,
+      "surname": "Goncalves"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Okuno, Hanako",
+      "name": "Hanako",
+      "pid": [],
+      "rank": 17,
+      "surname": "Okuno"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Uhlig, Benjamin",
+      "name": "Benjamin",
+      "pid": [],
+      "rank": 18,
+      "surname": "Uhlig"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Todri-Sanial, Aida",
+      "name": "Aida",
+      "pid": [],
+      "rank": 19,
+      "surname": "Todri-Sanial"
+    },
+    {
+      "affiliation": [],
+      "fullname": "Dijon",
+      "name": "",
+      "pid": [],
+      "rank": 20,
+      "surname": ""
+    },
+    {
+      "affiliation": [],
+      "fullname": "Jean",
+      "name": "",
+      "pid": [],
+      "rank": 21,
+      "surname": ""
+    }
+  ],
+  "collectedfrom": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
+      "value": "VIRTA"
+    }
+  ],
+  "context": [],
+  "contributor": [],
+  "country": [],
+  "coverage": [],
+  "dataInfo": {
+    "deletedbyinference": false,
+    "inferenceprovenance": "",
+    "inferred": false,
+    "invisible": false,
+    "provenanceaction": {
+      "classid": "sysimport:crosswalk:datasetarchive",
+      "classname": "sysimport:crosswalk:datasetarchive",
+      "schemeid": "dnet:provenanceActions",
+      "schemename": "dnet:provenanceActions"
+    },
+    "trust": "0.9"
+  },
+  "dateofacceptance": {
+    "dataInfo": {
+      "deletedbyinference": false,
+      "inferenceprovenance": "",
+      "inferred": false,
+      "invisible": false,
+      "provenanceaction": {
+        "classid": "",
+        "classname": "",
+        "schemeid": "",
+        "schemename": ""
+      },
+      "trust": ""
+    },
+    "value": "2018-01-01"
+  },
+  "dateofcollection": "2020-01-27T11:32:33.729Z",
+  "dateoftransformation": "2020-01-27T12:03:59.662Z",
+  "description": [],
+  "embargoenddate": {
+    "dataInfo": {
+      "deletedbyinference": false,
+      "inferenceprovenance": "",
+      "inferred": false,
+      "invisible": false,
+      "provenanceaction": {
+        "classid": "",
+        "classname": "",
+        "schemeid": "",
+        "schemename": ""
+      },
+      "trust": ""
+    },
+    "value": ""
+  },
+  "extraInfo": [],
+  "format": [],
+  "fulltext": [],
+  "id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
+  "instance": [
+    {
+      "accessright": {
+        "classid": "OPEN",
+        "classname": "Open Access",
+        "schemeid": "dnet:access_modes",
+        "schemename": "dnet:access_modes"
+      },
+      "collectedfrom": {
+        "dataInfo": {
+          "deletedbyinference": false,
+          "inferenceprovenance": "",
+          "inferred": false,
+          "invisible": false,
+          "provenanceaction": {
+            "classid": "",
+            "classname": "",
+            "schemeid": "",
+            "schemename": ""
+          },
+          "trust": ""
+        },
+        "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
+        "value": "VIRTA"
+      },
+      "dateofacceptance": {
+        "dataInfo": {
+          "deletedbyinference": false,
+          "inferenceprovenance": "",
+          "inferred": false,
+          "invisible": false,
+          "provenanceaction": {
+            "classid": "",
+            "classname": "",
+            "schemeid": "",
+            "schemename": ""
+          },
+          "trust": ""
+        },
+        "value": "2018-01-01"
+      },
+      "distributionlocation": "",
+      "hostedby": {
+        "dataInfo": {
+          "deletedbyinference": false,
+          "inferenceprovenance": "",
+          "inferred": false,
+          "invisible": false,
+          "provenanceaction": {
+            "classid": "",
+            "classname": "",
+            "schemeid": "",
+            "schemename": ""
+          },
+          "trust": ""
+        },
+        "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
+        "value": "VIRTA"
+      },
+      "instancetype": {
+        "classid": "0001",
+        "classname": "Article",
+        "schemeid": "dnet:dataCite_resource",
+        "schemename": "dnet:dataCite_resource"
+      },
+      "license": {
+        "dataInfo": {
+          "deletedbyinference": false,
+          "inferenceprovenance": "",
+          "inferred": false,
+          "invisible": false,
+          "provenanceaction": {
+            "classid": "",
+            "classname": "",
+            "schemeid": "",
+            "schemename": ""
+          },
+          "trust": ""
+        },
+        "value": ""
+      },
+      "url": [
+        "http://juuli.fi/Record/0331473718",
+        "http://dx.doi.org/10.1109/TED.2018.2853550"
+      ]
+    }
+  ],
+  "journal": {
+    "conferencedate": "",
+    "conferenceplace": "",
+    "dataInfo": {
+      "deletedbyinference": false,
+      "inferenceprovenance": "",
+      "inferred": false,
+      "invisible": false,
+      "provenanceaction": {
+        "classid": "",
+        "classname": "",
+        "schemeid": "",
+        "schemename": ""
+      },
+      "trust": ""
+    },
+    "edition": "",
+    "ep": "3892",
+    "iss": "9",
+    "issnLinking": "",
+    "issnOnline": "",
+    "issnPrinted": "0018-9383",
+    "name": "IEEE Transactions on Electron Devices",
+    "sp": "3884",
+    "vol": "65"
+  },
+  "language": {
+    "classid": "en",
+    "classname": "en",
+    "schemeid": "dnet:languages",
+    "schemename": "dnet:languages"
+  },
+  "lastupdatetimestamp": 0,
+  "originalId": [
+    "0331473718",
+    "10.1109/TED.2018.2853550",
+    "http://juuli.fi/Record/0331473718"
+  ],
+  "pid": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "doi",
+        "classname": "doi",
+        "schemeid": "dnet:pid_types",
+        "schemename": "dnet:pid_types"
+      },
+      "value": "10.1109/TED.2018.2853550"
+    }
+  ],
+  "publisher": {
+    "dataInfo": {
+      "deletedbyinference": false,
+      "inferenceprovenance": "",
+      "inferred": false,
+      "invisible": false,
+      "provenanceaction": {
+        "classid": "",
+        "classname": "",
+        "schemeid": "",
+        "schemename": ""
+      },
+      "trust": ""
+    },
+    "value": ""
+  },
+  "relevantdate": [],
+  "resourcetype": {
+    "classid": "0001",
+    "classname": "Article",
+    "schemeid": "dnet:dataCite_resource",
+    "schemename": "dnet:dataCite_resource"
+  },
+  "resulttype": {
+    "classid": "publication",
+    "classname": "publication",
+    "schemeid": "dnet:result_typologies",
+    "schemename": "dnet:result_typologies"
+  },
+  "source": [],
+  "subject": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "http://finto.fi/okm-tieteenala/en/",
+        "classname": "finto",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "ta114"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Conductivity"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Contacts"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Cu-carbon nanotubes (CNT) composites"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "density functional theory (DFT)"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Discrete Fourier transforms"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Electromigration"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "electromigration (EM)"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "electrothermal"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "interconnects"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Lattices"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "multiscale simulation"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Resistance"
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "self-heating."
+    },
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "keyword",
+        "classname": "keyword",
+        "schemeid": "dnet:subject_classification_typologies",
+        "schemename": "dnet:subject_classification_typologies"
+      },
+      "value": "Thermal conductivity"
+    }
+  ],
+  "title": [
+    {
+      "dataInfo": {
+        "deletedbyinference": false,
+        "inferenceprovenance": "",
+        "inferred": false,
+        "invisible": false,
+        "provenanceaction": {
+          "classid": "",
+          "classname": "",
+          "schemeid": "",
+          "schemename": ""
+        },
+        "trust": ""
+      },
+      "qualifier": {
+        "classid": "main title",
+        "classname": "main title",
+        "schemeid": "dnet:dataCite_title",
+        "schemename": "dnet:dataCite_title"
+      },
+      "value": "Understanding Electromigration in Cu-CNT Composite Interconnects A Multiscale Electrothermal Simulation Study"
+    }
+  ]
+}
--- a/dhp-workflows/dhp-stats-promote/pom.xml
+++ b/dhp-workflows/dhp-stats-promote/pom.xml
@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>dhp-workflows</artifactId>
+        <groupId>eu.dnetlib.dhp</groupId>
+        <version>1.2.4-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>dhp-stats-promote</artifactId>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_2.11</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_2.11</artifactId>
+        </dependency>
+    </dependencies>
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>pl.project13.maven</groupId>
+				<artifactId>git-commit-id-plugin</artifactId>
+                <version>2.1.11</version>
+				<configuration>
+					<failOnNoGitDirectory>false</failOnNoGitDirectory>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+</project>
--- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
@ -0,0 +1,34 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>${jobTracker}</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>${nameNode}</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>hive_metastore_uris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>hive_jdbc_url</name>
+        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
+    </property>
+	<property>
+		<name>oozie.wf.workflow.notification.url</name>
+		<value>{serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status</value>
+	</property>
+    <property>
+        <name>stats_tool_api_url</name>
+        <value>${stats_tool_api_url}</value>
+    </property>
+</configuration>
--- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh
+++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh
@ -0,0 +1,18 @@
+export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
+export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
+if ! [ -L $link_folder ]
+then
+    rm -Rf "$link_folder"
+    ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
+fi
+
+echo "Getting file from " $3
+hdfs dfs -copyToLocal $3
+
+echo "Running impala shell make the new database visible"
+impala-shell -q "INVALIDATE METADATA;"
+
+echo "Running impala shell to compute new table stats"
+impala-shell -d $1 -f $2
+echo "Impala shell finished"
+rm $2
--- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/promoteCache.sh
+++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/promoteCache.sh
@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+curl --request GET $1/cache/promoteCache
+
--- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/computeProductionStats.sql
+++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/computeProductionStats.sql
@ -0,0 +1,8 @@
+------------------------------------------------------
+------------------------------------------------------
+-- Impala table statistics - Needed to make the tables
+-- visible for impala
+------------------------------------------------------
+------------------------------------------------------
+
+INVALIDATE METADATA ${stats_db_name};
--- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/updateProductionViews.sql
+++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/updateProductionViews.sql
@ -0,0 +1,207 @@
+------------------------------------------------------
+------------------------------------------------------
+-- Shadow schema table exchange
+------------------------------------------------------
+------------------------------------------------------
+
+-- Dropping old views
+DROP VIEW IF EXISTS ${stats_db_production_name}.category;
+DROP VIEW IF EXISTS ${stats_db_production_name}.concept;
+DROP VIEW IF EXISTS ${stats_db_production_name}.context;
+DROP VIEW IF EXISTS ${stats_db_production_name}.country;
+DROP VIEW IF EXISTS ${stats_db_production_name}.countrygdp;
+DROP VIEW IF EXISTS ${stats_db_production_name}.creation_date;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_citations;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_classifications;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_concepts;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_datasources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_languages;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_licenses;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_oids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_pids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_refereed;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_sources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_topics;
+DROP VIEW IF EXISTS ${stats_db_production_name}.datasource;
+DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_languages;
+DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_oids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_organizations;
+DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_results;
+DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_sources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.funder;
+DROP VIEW IF EXISTS ${stats_db_production_name}.fundref;
+DROP VIEW IF EXISTS ${stats_db_production_name}.numbers_country;
+DROP VIEW IF EXISTS ${stats_db_production_name}.organization;
+DROP VIEW IF EXISTS ${stats_db_production_name}.organization_datasources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.organization_pids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.organization_projects;
+DROP VIEW IF EXISTS ${stats_db_production_name}.organization_sources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_citations;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_classifications;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_concepts;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_datasources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_languages;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_licenses;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_oids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_pids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_refereed;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_sources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_topics;
+DROP VIEW IF EXISTS ${stats_db_production_name}.project;
+DROP VIEW IF EXISTS ${stats_db_production_name}.project_oids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.project_organizations;
+DROP VIEW IF EXISTS ${stats_db_production_name}.project_results;
+DROP VIEW IF EXISTS ${stats_db_production_name}.project_resultcount;
+DROP VIEW IF EXISTS ${stats_db_production_name}.project_results_publication;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_citations;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_classifications;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_concepts;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_datasources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_languages;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_licenses;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_oids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_pids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_refereed;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_sources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.publication_topics;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_affiliated_country;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_citations;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_classifications;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_concepts;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_datasources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_deposited_country;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_fundercount;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_gold;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_greenoa;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_languages;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_licenses;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_oids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_organization;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_peerreviewed;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_pids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_projectcount;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_projects;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_refereed;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_sources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.result_topics;
+DROP VIEW IF EXISTS ${stats_db_production_name}.rndexpediture;
+DROP VIEW IF EXISTS ${stats_db_production_name}.roarmap;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_citations;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_classifications;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_concepts;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_datasources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_languages;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_licenses;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_oids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_pids;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_refereed;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_sources;
+DROP VIEW IF EXISTS ${stats_db_production_name}.software_topics;
+
+
+-- Creating the shadow database, in case it doesn't exist
+CREATE database IF NOT EXISTS ${stats_db_production_name};
+
+-- Creating new views
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.category AS SELECT * FROM ${stats_db_name}.category;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.concept AS SELECT * FROM ${stats_db_name}.concept;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.context AS SELECT * FROM ${stats_db_name}.context;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.country AS SELECT * FROM ${stats_db_name}.country;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.countrygdp AS SELECT * FROM ${stats_db_name}.countrygdp;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.creation_date AS SELECT * FROM ${stats_db_name}.creation_date;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset AS SELECT * FROM ${stats_db_name}.dataset;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_citations AS SELECT * FROM ${stats_db_name}.dataset_citations;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_classifications AS SELECT * FROM ${stats_db_name}.dataset_classifications;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_concepts AS SELECT * FROM ${stats_db_name}.dataset_concepts;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_datasources AS SELECT * FROM ${stats_db_name}.dataset_datasources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_languages AS SELECT * FROM ${stats_db_name}.dataset_languages;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_licenses AS SELECT * FROM ${stats_db_name}.dataset_licenses;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_oids AS SELECT * FROM ${stats_db_name}.dataset_oids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_pids AS SELECT * FROM ${stats_db_name}.dataset_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_refereed AS SELECT * FROM ${stats_db_name}.dataset_refereed;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_sources AS SELECT * FROM ${stats_db_name}.dataset_sources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_topics AS SELECT * FROM ${stats_db_name}.dataset_topics;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource AS SELECT * FROM ${stats_db_name}.datasource;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_languages AS SELECT * FROM ${stats_db_name}.datasource_languages;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_oids AS SELECT * FROM ${stats_db_name}.datasource_oids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_organizations AS SELECT * FROM ${stats_db_name}.datasource_organizations;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_results AS SELECT * FROM ${stats_db_name}.datasource_results;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_sources AS SELECT * FROM ${stats_db_name}.datasource_sources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.funder AS SELECT * FROM ${stats_db_name}.funder;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.fundref AS SELECT * FROM ${stats_db_name}.fundref;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.numbers_country AS SELECT * FROM ${stats_db_name}.numbers_country;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization AS SELECT * FROM ${stats_db_name}.organization;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization_datasources AS SELECT * FROM ${stats_db_name}.organization_datasources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization_pids AS SELECT * FROM ${stats_db_name}.organization_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization_projects AS SELECT * FROM ${stats_db_name}.organization_projects;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization_sources AS SELECT * FROM ${stats_db_name}.organization_sources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct AS SELECT * FROM ${stats_db_name}.otherresearchproduct;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_citations AS SELECT * FROM ${stats_db_name}.otherresearchproduct_citations;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_classifications AS SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_concepts AS SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_datasources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_languages AS SELECT * FROM ${stats_db_name}.otherresearchproduct_languages;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_licenses AS SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_oids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_oids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_pids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_refereed AS SELECT * FROM ${stats_db_name}.otherresearchproduct_refereed;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_sources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_topics AS SELECT * FROM ${stats_db_name}.otherresearchproduct_topics;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project AS SELECT * FROM ${stats_db_name}.project;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_oids AS SELECT * FROM ${stats_db_name}.project_oids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_organizations AS SELECT * FROM ${stats_db_name}.project_organizations;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_results AS SELECT * FROM ${stats_db_name}.project_results;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_resultcount AS SELECT * FROM ${stats_db_name}.project_resultcount;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_results_publication AS SELECT * FROM ${stats_db_name}.project_results_publication;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication AS SELECT * FROM ${stats_db_name}.publication;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_citations AS SELECT * FROM ${stats_db_name}.publication_citations;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_languages AS SELECT * FROM ${stats_db_name}.publication_languages;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_oids AS SELECT * FROM ${stats_db_name}.publication_oids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_pids AS SELECT * FROM ${stats_db_name}.publication_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_refereed AS SELECT * FROM ${stats_db_name}.publication_refereed;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_sources AS SELECT * FROM ${stats_db_name}.publication_sources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_topics AS SELECT * FROM ${stats_db_name}.publication_topics;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result AS SELECT * FROM ${stats_db_name}.result;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_affiliated_country AS SELECT * FROM ${stats_db_name}.result_affiliated_country;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_citations AS SELECT * FROM ${stats_db_name}.result_citations;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_classifications AS SELECT * FROM ${stats_db_name}.result_classifications;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_concepts AS SELECT * FROM ${stats_db_name}.result_concepts;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_datasources AS SELECT * FROM ${stats_db_name}.result_datasources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_deposited_country AS SELECT * FROM ${stats_db_name}.result_deposited_country;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_fundercount AS SELECT * FROM ${stats_db_name}.result_fundercount;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_gold AS SELECT * FROM ${stats_db_name}.result_gold;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_greenoa AS SELECT * FROM ${stats_db_name}.result_greenoa;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_languages AS SELECT * FROM ${stats_db_name}.result_languages;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_licenses AS SELECT * FROM ${stats_db_name}.result_licenses;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_oids AS SELECT * FROM ${stats_db_name}.result_oids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_organization AS SELECT * FROM ${stats_db_name}.result_organization;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_peerreviewed AS SELECT * FROM ${stats_db_name}.result_peerreviewed;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_pids AS SELECT * FROM ${stats_db_name}.result_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_projectcount AS SELECT * FROM ${stats_db_name}.result_projectcount;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_projects AS SELECT * FROM ${stats_db_name}.result_projects;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_refereed AS SELECT * FROM ${stats_db_name}.result_refereed;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_sources AS SELECT * FROM ${stats_db_name}.result_sources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_topics AS SELECT * FROM ${stats_db_name}.result_topics;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.rndexpediture AS SELECT * FROM ${stats_db_name}.rndexpediture;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.roarmap AS SELECT * FROM ${stats_db_name}.roarmap;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software AS SELECT * FROM ${stats_db_name}.software;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_citations AS SELECT * FROM ${stats_db_name}.software_citations;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_classifications AS SELECT * FROM ${stats_db_name}.software_classifications;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_concepts AS SELECT * FROM ${stats_db_name}.software_concepts;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_datasources AS SELECT * FROM ${stats_db_name}.software_datasources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_languages AS SELECT * FROM ${stats_db_name}.software_languages;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_licenses AS SELECT * FROM ${stats_db_name}.software_licenses;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_oids AS SELECT * FROM ${stats_db_name}.software_oids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_pids AS SELECT * FROM ${stats_db_name}.software_pids;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_refereed AS SELECT * FROM ${stats_db_name}.software_refereed;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_sources AS SELECT * FROM ${stats_db_name}.software_sources;
+CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_topics AS SELECT * FROM ${stats_db_name}.software_topics;
--- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@ -0,0 +1,87 @@
+<workflow-app name="Graph Stats" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>stats_db_name</name>
+            <description>the target stats database name</description>
+        </property>
+        <property>
+            <name>stats_db_production_name</name>
+            <description>the name of the production schema</description>
+        </property>
+        <property>
+            <name>stats_tool_api_url</name>
+            <description>The url of the API of the stats tool. Is used to trigger the cache promote.</description>
+        </property>
+        <property>
+            <name>hive_metastore_uris</name>
+            <description>hive server metastore URIs</description>
+        </property>
+        <property>
+            <name>hive_jdbc_url</name>
+            <description>hive server jdbc url</description>
+        </property>
+        <property>
+            <name>hive_timeout</name>
+            <description>the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds.</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>hive.metastore.uris</name>
+                <value>${hive_metastore_uris}</value>
+            </property>
+            <property>
+            	<name>hive.txn.timeout</name>
+            	<value>${hive_timeout}</value>
+            </property>
+        </configuration>
+    </global>
+
+    <start to="updateProductionViews"/>
+
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="updateProductionViews">
+        <hive2 xmlns="uri:oozie:hive2-action:0.1">
+            <jdbc-url>${hive_jdbc_url}</jdbc-url>
+            <script>scripts/updateProductionViews.sql</script>
+			<param>stats_db_name=${stats_db_name}</param>
+			<param>stats_db_production_name=${stats_db_production_name}</param>
+        </hive2>
+        <ok to="computeProductionStats"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="computeProductionStats">
+        <shell xmlns="uri:oozie:shell-action:0.1">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <exec>impala-shell.sh</exec>
+            <argument>${stats_db_production_name}</argument>
+            <argument>computeProductionStats.sql</argument>
+            <argument>${wf:appPath()}/scripts/computeProductionStats.sql</argument>
+            <file>impala-shell.sh</file>
+        </shell>
+        <ok to="promoteCache"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="promoteCache">
+        <shell xmlns="uri:oozie:shell-action:0.1">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <exec>promoteCache.sh</exec>
+            <argument>${stats_tool_api_url}</argument>
+            <file>promoteCache.sh</file>
+        </shell>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
@ -27,4 +27,8 @@
 		<name>oozie.wf.workflow.notification.url</name>
 		<value>{serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status</value>
 	</property>
+    <property>
+        <name>stats_tool_api_url</name>
+        <value>${stats_tool_api_url}</value>
+    </property>
 </configuration>
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
@ -11,7 +11,7 @@ CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids

 -- Project_organizations Table
 DROP TABLE IF EXISTS ${stats_db_name}.project_organizations;
-CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype='projectOrganization';
+CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype='projectOrganization' and r.datainfo.deletedbyinference=false;

 -- Project_results Table
 DROP TABLE IF EXISTS ${stats_db_name}.project_results;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
@ -25,7 +25,7 @@ CREATE OR REPLACE VIEW ${stats_db_name}.result_pids AS SELECT * FROM ${stats_db_
 CREATE OR REPLACE VIEW ${stats_db_name}.result_topics AS SELECT * FROM ${stats_db_name}.publication_topics UNION ALL SELECT * FROM ${stats_db_name}.software_topics UNION ALL SELECT * FROM ${stats_db_name}.dataset_topics UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_topics;

 DROP TABLE IF EXISTS ${stats_db_name}.result_organization;
-CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='resultOrganization';
+CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='resultOrganization' and r.datainfo.deletedbyinference=false;

 DROP TABLE IF EXISTS ${stats_db_name}.result_projects;
 CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend FROM ${stats_db_name}.result r JOIN ${stats_db_name}.project_results pr ON r.id=pr.result JOIN ${stats_db_name}.project_tmp p ON p.id=pr.id;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
@ -47,7 +47,7 @@ DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids;
 CREATE TABLE ${stats_db_name}.datasource_oids AS SELECT substr(d.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids;

 DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations;
-CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='datasourceOrganization';
+CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='datasourceOrganization' and r.datainfo.deletedbyinference=false;

 -- datasource sources:
 -- where the datasource info have been collected from.
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh
@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+curl --request GET $1/cache/updateCache
+
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@ -17,6 +17,10 @@
            <name>stats_db_shadow_name</name>
            <description>the name of the shadow schema</description>
        </property>
+        <property>
+            <name>stats_tool_api_url</name>
+            <description>The url of the API of the stats tool. Is used to trigger the cache update.</description>
+        </property>
        <property>
            <name>hive_metastore_uris</name>
            <description>hive server metastore URIs</description>
@ -255,7 +259,7 @@
    <action name="Step17">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <jdbc-url>${hive_jdbc_url}</jdbc-url>
-            <script>scripts/step17.sql</script>
+            <script>scripts/updateProductionViews.sql</script>
 			<param>stats_db_name=${stats_db_name}</param>
 			<param>stats_db_shadow_name=${stats_db_shadow_name}</param>
        </hive2>
@ -283,10 +287,22 @@
            <name-node>${nameNode}</name-node>
            <exec>impala-shell.sh</exec>
            <argument>${stats_db_shadow_name}</argument>
-            <argument>step19.sql</argument>
-            <argument>${wf:appPath()}/scripts/step19.sql</argument>
+            <argument>computeProductionStats.sql</argument>
+            <argument>${wf:appPath()}/scripts/computeProductionStats.sql</argument>
            <file>impala-shell.sh</file>
        </shell>
+        <ok to="Step20"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="Step20">
+        <shell xmlns="uri:oozie:shell-action:0.1">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <exec>updateCache.sh</exec>
+            <argument>${stats_tool_api_url}</argument>
+            <file>updateCache.sh</file>
+        </shell>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@ -7,7 +7,7 @@
        <groupId>eu.dnetlib.dhp</groupId>
        <artifactId>dhp</artifactId>
        <version>1.2.4-SNAPSHOT</version>
-        <relativePath>../</relativePath>
+        <relativePath>../pom.xml</relativePath>
    </parent>

    <artifactId>dhp-workflows</artifactId>
--- a/pom.xml
+++ b/pom.xml
@ -278,12 +278,12 @@
 			<dependency>
 				<groupId>org.apache.httpcomponents</groupId>
 				<artifactId>httpclient</artifactId>
-				<version>4.5.3</version>
+				<version>${org.apache.httpcomponents.version}</version>
 			</dependency>
 			<dependency>
 				<groupId>org.apache.httpcomponents</groupId>
 				<artifactId>httpmime</artifactId>
-				<version>4.5.3</version>
+				<version>${org.apache.httpcomponents.version}</version>
 			</dependency>
 			<dependency>
 				<groupId>org.noggit</groupId>
@ -484,12 +484,6 @@
 				<version>${common.text.version}</version>
 			</dependency>

-			<dependency>
-				<groupId>org.apache.httpcomponents</groupId>
-				<artifactId>httpclient</artifactId>
-				<version>${org.apache.httpcomponents.version}</version>
-			</dependency>
-
 		</dependencies>
 	</dependencyManagement>

@ -719,7 +713,7 @@
 		<common.csv.version>1.8</common.csv.version>
 		<apache.poi.version>4.1.2</apache.poi.version>
 		<common.text.version>1.8</common.text.version>
-		<org.apache.httpcomponents.version>4.3.4</org.apache.httpcomponents.version>
+		<org.apache.httpcomponents.version>4.5.3</org.apache.httpcomponents.version>
 		<net.alchim31.maven.version>4.0.1</net.alchim31.maven.version>
 	</properties>
 </project>