Merge remote-tracking branch 'upstream/stable_ids' into stable_ids

2021-09-01 23:03:07 +02:00 · 2021-09-01 23:03:07 +02:00 · 5680f901d1
parent c60be35b0a c26980f1c4
commit 5680f901d1
38 changed files with 1098 additions and 97 deletions
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java
@ -142,18 +142,9 @@ public class HttpConnector2 {
 				}
 				return attemptDownload(newUrl, retryNumber + 1, report);
 			}
-			if (is4xx(urlConn.getResponseCode())) {
-				// CLIENT ERROR, DO NOT RETRY
-				report
-					.put(
-						REPORT_PREFIX + urlConn.getResponseCode(),
-						String
-							.format(
-								"%s error: %s", requestUrl, urlConn.getResponseMessage()));
-				throw new CollectorException("4xx error: request will not be repeated. " + report);
-			}
-			if (is5xx(urlConn.getResponseCode())) {
+			if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) {
 				switch (urlConn.getResponseCode()) {
+					case HttpURLConnection.HTTP_NOT_FOUND:
 					case HttpURLConnection.HTTP_BAD_GATEWAY:
 					case HttpURLConnection.HTTP_UNAVAILABLE:
 					case HttpURLConnection.HTTP_GATEWAY_TIMEOUT:
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
@ -21,6 +21,9 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;

 public class OaiCollectorPlugin implements CollectorPlugin {

+	public static final String DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}";
+	public static final String UTC_DATETIME_REGEX = "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z";
+
 	private static final String FORMAT_PARAM = "format";
 	private static final String OAI_SET_PARAM = "set";
 	private static final Object OAI_FROM_DATE_PARAM = "fromDate";
@ -62,13 +65,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
 			throw new CollectorException("Param 'mdFormat' is null or empty");
 		}

-		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")
-			&& !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
+		if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
 			throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate);
 		}

-		if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")
-			&& !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
+		if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
 			throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate);
 		}

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
@ -107,12 +107,12 @@ public class OaiIterator implements Iterator<String> {
 			if (set != null && !set.isEmpty()) {
 				url += "&set=" + URLEncoder.encode(set, "UTF-8");
 			}
-			if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}")
-				|| fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
+			if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
+				|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
 				url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
 			}
-			if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}")
-				|| untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
+			if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
+				|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
 				url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
 			}
 			log.info("Start harvesting using url: " + url);
--- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java
+++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateStatsJob.java
@ -80,6 +80,7 @@ public class GenerateStatsJob {
 				.map(
 					(MapFunction<Tuple2<String, DatasourceStats>, DatasourceStats>) t -> t._2,
 					Encoders.bean(DatasourceStats.class))
+				.coalesce(1)
 				.write()
 				.mode(SaveMode.Overwrite)
 				.jdbc(dbUrl, "oa_datasource_stats_temp", connectionProperties);
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/AuthorAssoc.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/AuthorAssoc.java
@ -0,0 +1,47 @@
+package eu.dnetlib.doiboost;
+
+import eu.dnetlib.dhp.schema.oaf.Author;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+public class AuthorAssoc implements Serializable {
+    private Double score ;
+    private List<Author> to_be_enriched;
+    private Author with_enricheing_content;
+
+    public Double getScore() {
+        return score;
+    }
+
+    public void setScore(Double score) {
+        this.score = score;
+    }
+
+    public List<Author> getTo_be_enriched() {
+        return to_be_enriched;
+    }
+
+    public void setTo_be_enriched(List<Author> to_be_enriched) {
+        this.to_be_enriched = to_be_enriched;
+    }
+
+    public Author getWith_enricheing_content() {
+        return with_enricheing_content;
+    }
+
+    public void setWith_enricheing_content(Author with_enricheing_content) {
+        this.with_enricheing_content = with_enricheing_content;
+    }
+
+    public static AuthorAssoc newInstance(Author a){
+        AuthorAssoc ret = new AuthorAssoc();
+        ret.score = 0.0;
+        ret.to_be_enriched = new ArrayList<>();
+        ret.with_enricheing_content = a;
+
+        return ret;
+
+    }
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostAuthorMerger.java
@ -0,0 +1,236 @@
+
+package eu.dnetlib.doiboost;
+
+import java.text.Normalizer;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+import com.wcohen.ss.JaroWinkler;
+
+import eu.dnetlib.dhp.schema.oaf.Author;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+import scala.Tuple2;
+
+/**
+ * This is a version of the AuthorMerger specific for DoiBoost.
+ * Here we suppose a match must exist for the authors. We compare via JaroWrinkler similarity measure each author in the list
+ * that should be enriched with each author in the enriching list. For each enriching author we select the best match that is
+ * the author with the highest similarity score.
+ * The association is done from the enriching author to the enriched because in this way only one match per enriching author can be found
+ * One enriching author can have the same maximum similarity score with more than one
+ *
+ *
+ *
+ *
+ * The idea is to enrich the most similar authors having at least one
+ * word of the name in
+ * common
+ * Quello che faccio e’ abbastanza semplice: ho una struttura dati che mantine l’informazione di associazione fra il record che puo’ possibilmente arricchire e quello che deve essere arricchito.
+ * 6:22
+ * Questa struttura ha la lista di autori che possono essere arricchiti, l’autore che arricchisce e lo score di similarita fra l’autore che arricchisce e gli autori arricchiti. E’ il valore di una mappa che per chiave la il fullname dell’autore che arricchisce
+ * 6:23
+ * per ogni autore che puo’ essere arricchito verifico se la entri nella mappa di quello che arricchisce e’ associata ad un autore con score di similarita’ piu’ basso. Se cosi’ e’ modifico l’associazione nella mappa per l’autore che arricchisce, sostituendo l’autore arricchito a cui era associato prima con quello nuovo che ha score piu’ alto. Se lo score e’ lo stesso, aggiungo il nuovo autore da arricchire alla lista degli autori associata all’autore che arricchisce
+ * 6:25
+ * Alla fine caso facile: ogni entry e’ associata ad un unico autore da arricchire => verifico che almeno una delle parole che sono nei due nomi sia in comune fra i due insiemi Se e’ cosi’, aggiungo i pid mancanti all’autore da arricchire dell’autore che arricchisce
+ * 6:26
+ * caso brutto: ci sono piu’ autori da arricchire con la stessa similarita: arricchisco quello che ha il maggior numero di parole del fullname uguali a quelle dell’autore che arricchisce. In caso di parita’ non si arricchisce
+ * 6:28
+ * ricordiamoci che si parte dal presupposto che un match debba esistere visto che abbiamo lo stesso doi
+ * 6:29
+ * di conseguenza l’autore che ha lo score di similarita’ piu’ alto fra quelli presenti ed anche una parola in comune del nome dovrebbe essere sufficiente per poterlo arricchire.
+ * 6:30
+ * I casi di omonimia che potrebbero portare problemi con i rank degli autori non si mappano
+ */
+
+public class DoiBoostAuthorMerger {
+
+
+	public static List<Author> merge(List<List<Author>> authors,  Boolean crossref) {
+
+		Iterator<List<Author>> it = authors.iterator();
+		List<Author> author = it.next();
+
+		while (it.hasNext()){
+			List<Author> autList = it.next();
+			Tuple2<List<Author>, Boolean> tmp = mergeAuthor(author, autList, crossref);
+			author = tmp._1();
+			crossref = tmp._2();
+		}
+
+		return author;
+
+	}
+
+	public static Tuple2<List<Author>, Boolean> mergeAuthor(final List<Author> baseAuthor, final List<Author> otherAuthor,
+										    final Boolean crossref) {
+
+		if(baseAuthor == null || baseAuthor.size() == 0)
+			return new Tuple2<>(otherAuthor, false);
+		if(otherAuthor == null || otherAuthor.size() == 0)
+			return new Tuple2<>(baseAuthor, crossref);
+
+		if(crossref) {
+			enrichPidFromList(baseAuthor, otherAuthor);
+			return new Tuple2<>(baseAuthor, true);
+		}
+		else
+			if (baseAuthor.size() > otherAuthor.size()){
+				enrichPidFromList(baseAuthor, otherAuthor);
+				return new Tuple2<>(baseAuthor, false);
+			}else{
+				enrichPidFromList(otherAuthor, baseAuthor);
+				return new Tuple2<>(otherAuthor, false);
+			}
+
+	}
+
+
+	private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
+
+		//search authors having identifiers in the enrich list
+        final List<Author> authorsWithPids = enrich
+                .stream()
+                .filter(a -> a.getPid() != null && a.getPid().size() > 0)
+                .collect(Collectors.toList());
+
+		Map<String, AuthorAssoc> assocMap = authorsWithPids
+				.stream()
+				.map(
+						a -> new Tuple2<>(DHPUtils.md5(a.getFullname()), AuthorAssoc.newInstance(a)))
+				.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
+
+
+		//for each author in the base list, we search the best enriched match
+		base.stream()
+				.map(a -> new Tuple2<>(a, authorsWithPids.stream()
+						.map(e -> new Tuple2<>(e, sim(a, e))).collect(Collectors.toList())))
+                .forEach(t2 -> {
+
+                    for (Tuple2<Author, Double> t : t2._2()) {
+                    	String mapEntry = DHPUtils.md5(t._1().getFullname());
+                    	AuthorAssoc aa = assocMap.get(mapEntry);
+                    	if(aa.getScore() < t._2() && aa.getScore() < 0.9){
+							aa.setScore(t._2());
+							aa.setTo_be_enriched(new ArrayList<>());
+							aa.getTo_be_enriched().add(t2._1());
+						}else if(t._2() > 0.9){
+                    		aa.getTo_be_enriched().add(t2._1());
+						}
+                    }
+
+                });
+                
+		assocMap.keySet().forEach(k -> enrichAuthor(assocMap.get(k)));
+
+
+	}
+
+	private static long getCommonWords(List<String> fullEnrich, List<String> fullEnriching){
+		return fullEnrich.stream().filter( w -> fullEnriching.contains(w)).count();
+	}
+
+
+	private static void enrichAuthor(Author enrich, Author enriching){
+		//verify if some of the words in the fullname are contained in the other
+		//get normalized fullname
+
+		long commonWords = getCommonWords(normalize(enrich.getFullname()),
+				normalize(enriching.getFullname()));
+		if(commonWords > 0 ){
+			if(enrich.getPid() == null){
+				enrich.setPid(new ArrayList<>());
+			}
+				Set<String> aPids = enrich.getPid().stream().map(p -> pidToComparableString(p)).collect(Collectors.toSet());
+			enriching.getPid().forEach(p -> {
+					if (!aPids.contains(pidToComparableString(p))){
+						enrich.getPid().add(p);
+					}
+				});
+			if (enrich.getAffiliation() == null){
+				if (enriching.getAffiliation() != null){
+					enrich.setAffiliation(enriching.getAffiliation());
+				}
+			}
+		}
+
+
+	}
+
+	//Verify the number of words in common. The one that has more, wins. If the number of words in common are the same we
+	//enrich no author
+	private static void enrichAuthor(AuthorAssoc authorAssoc) {
+		if (authorAssoc.getTo_be_enriched().size() == 1){
+			enrichAuthor(authorAssoc.getTo_be_enriched().get(0), authorAssoc.getWith_enricheing_content());
+		}else{
+			long common = 0;
+			List<Author> selected = new ArrayList<>() ;
+			for(Author a : authorAssoc.getTo_be_enriched()){
+				long current_common = getCommonWords(normalize(a.getFullname()),
+						normalize(authorAssoc.getWith_enricheing_content().getFullname()));
+				if (current_common > common){
+					common = current_common;
+					selected = new ArrayList<>();
+					selected.add(a);
+				}else if(current_common == common){
+					selected.add(a);
+				}
+			}
+			if (selected.size() == 1){
+				enrichAuthor(selected.get(0), authorAssoc.getWith_enricheing_content());
+			}
+		}
+
+	}
+
+
+	public static String pidToComparableString(StructuredProperty pid) {
+		return (pid.getQualifier() != null
+			? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
+			: "")
+			+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
+	}
+
+
+
+
+	private static Double sim(Author a, Author b) {
+			return new JaroWinkler()
+				.score(normalizeString(a.getFullname()), normalizeString(b.getFullname()));
+
+	}
+
+	private static String normalizeString(String fullname) {
+		return String.join(" ", normalize(fullname));
+	}
+
+
+	private static List<String> normalize(final String s) {
+		String[] normalized = nfd(s)
+			.replaceAll("[^\\p{ASCII}]", "")
+			.toLowerCase()
+			// do not compact the regexes in a single expression, would cause StackOverflowError
+			// in case
+			// of large input strings
+			.replaceAll("(\\W)+", " ")
+			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+			.replaceAll("(\\p{Punct})+", " ")
+			.replaceAll("(\\d)+", " ")
+			.replaceAll("(\\n)+", " ")
+			.trim()
+			.split(" ");
+
+		Arrays.sort(normalized);
+
+		return Arrays.asList(normalized);
+
+
+	}
+
+	private static String nfd(final String s) {
+		return Normalizer.normalize(s, Normalizer.Form.NFD);
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
@ -38,6 +38,9 @@ object DoiBoostMappingUtil {
  val OPENAIRE_PREFIX = "openaire____"
  val SEPARATOR = "::"

+  val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
+  val DOI_PREFIX = "10."
+
  val invalidName = List(",", "none none", "none, none", "none &na;", "(:null)", "test test test", "test test", "test", "&na; &na;")

  def toActionSet(item:Oaf) :(String, String) = {
@ -352,5 +355,28 @@ object DoiBoostMappingUtil {

  }

+  def isEmpty(x: String) = x == null || x.trim.isEmpty
+
+  def normalizeDoi(input : String) :String ={
+    if(input == null)
+      return null
+    val replaced = input.replaceAll("(?:\\n|\\r|\\t|\\s)", "").toLowerCase.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
+    if  (isEmpty(replaced))
+      return null
+
+    if(replaced.indexOf("10.") < 0)
+      return null
+
+    val ret = replaced.substring(replaced.indexOf("10."))
+
+    if (!ret.startsWith(DOI_PREFIX))
+      return null
+
+    return ret
+
+
+  }
+
+

 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala
@ -3,7 +3,7 @@ package eu.dnetlib.doiboost
 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.oa.merge.AuthorMerger
 import eu.dnetlib.dhp.schema.common.ModelConstants
-import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset}
+import eu.dnetlib.dhp.schema.oaf.{Author, Organization, Publication, Relation, Dataset => OafDataset}
 import eu.dnetlib.doiboost.mag.ConversionUtil
 import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
@ -25,6 +25,7 @@ object SparkGenerateDoiBoost {
    val conf: SparkConf = new SparkConf()
    val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json")))
    parser.parseArgument(args)
+    var crossref : Boolean = true
    val spark: SparkSession =
      SparkSession
        .builder()
@ -104,7 +105,10 @@ object SparkGenerateDoiBoost {
        val otherPub = item._2._2
        if (otherPub != null) {
          crossrefPub.mergeFrom(otherPub)
-          crossrefPub.setAuthor(AuthorMerger.mergeAuthor(crossrefPub.getAuthor, otherPub.getAuthor))
+          val mergeRes : (java.util.List[Author], java.lang.Boolean) = DoiBoostAuthorMerger.mergeAuthor(crossrefPub.getAuthor, otherPub.getAuthor, crossref)
+          crossrefPub.setAuthor(mergeRes._1)
+          crossref = mergeRes._2
+
        }
      }
      crossrefPub
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala
@ -16,9 +16,10 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.matching.Regex
 import eu.dnetlib.dhp.schema.scholexplorer.OafUtils
-
 import java.util

+import eu.dnetlib.doiboost.DoiBoostMappingUtil
+
 case class CrossrefDT(doi: String, json:String, timestamp: Long) {}

 case class mappingAffiliation(name: String) {}
@ -89,7 +90,7 @@ case object Crossref2Oaf {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats

    //MAPPING Crossref DOI into PID
-    val doi: String = (json \ "DOI").extract[String]
+    val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
    result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)

    //MAPPING Crossref DOI into OriginalId
@ -101,6 +102,7 @@ case object Crossref2Oaf {
    val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava)
    result.setOriginalId(originalIds)

+
    // Add DataInfo
    result.setDataInfo(generateDataInfo())

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala
@ -1,6 +1,7 @@
 package eu.dnetlib.doiboost.crossref

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.io.{IntWritable, Text}
 import org.apache.spark.SparkConf
@ -21,7 +22,7 @@ object CrossrefDataset {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: json4s.JValue = parse(input)
    val ts:Long = (json \ "indexed" \ "timestamp").extract[Long]
-    val doi:String  = (json \ "DOI").extract[String]
+    val doi:String  = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
    CrossrefDT(doi, input, ts)

  }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/GenerateCrossrefDataset.scala
@ -1,6 +1,7 @@
 package eu.dnetlib.doiboost.crossref

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item
 import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass
 import org.apache.hadoop.io.{IntWritable, Text}
@ -27,7 +28,7 @@ object GenerateCrossrefDataset {
  def crossrefElement(meta: String): CrossrefDT = {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: json4s.JValue = parse(meta)
-    val doi:String = (json \ "DOI").extract[String]
+    val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
    val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long]
    CrossrefDT(doi, meta, timestamp)

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala
@ -196,8 +196,8 @@ case object ConversionUtil {
    val authors = inputParams._2

    val pub = new Publication
-    pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
-    pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
+    pub.setPid(List(createSP(paper.Doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
+    pub.setOriginalId(List(paper.PaperId.toString, paper.Doi).asJava)

    //IMPORTANT
    //The old method result.setId(generateIdentifier(result, doi))
@ -258,11 +258,14 @@ case object ConversionUtil {
    val description = inputParams._2

    val pub = new Publication
-    pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
-    pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
+    pub.setPid(List(createSP(paper.Doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
+    pub.setOriginalId(List(paper.PaperId.toString, paper.Doi).asJava)

-    //Set identifier as 50 | doiboost____::md5(DOI)
-    pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase))
+    //IMPORTANT
+    //The old method result.setId(generateIdentifier(result, doi))
+    //will be replaced using IdentifierFactory
+
+    pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub))

    val mainTitles = createSP(paper.PaperTitle, "main title", ModelConstants.DNET_DATACITE_TITLE)
    val originalTitles = createSP(paper.OriginalTitle, "alternative title", ModelConstants.DNET_DATACITE_TITLE)
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala
@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.mag

 import eu.dnetlib.dhp.application.ArgumentApplicationParser
 import eu.dnetlib.dhp.schema.oaf.Publication
+import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import org.apache.commons.io.IOUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.rdd.RDD
@ -12,6 +13,23 @@ import org.slf4j.{Logger, LoggerFactory}
 import scala.collection.JavaConverters._

 object SparkProcessMAG {
+
+  def getDistinctResults (d:Dataset[MagPapers]):Dataset[MagPapers]={
+    d.where(col("Doi").isNotNull)
+      .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING)
+      .reduceGroups((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2))
+      .map(_._2)(Encoders.product[MagPapers])
+      .map(mp => {
+        new MagPapers(mp.PaperId, mp.Rank, DoiBoostMappingUtil.normalizeDoi(mp.Doi),
+          mp.DocType, mp.PaperTitle, mp.OriginalTitle,
+          mp.BookTitle, mp.Year, mp.Date, mp.Publisher: String,
+          mp.JournalId, mp.ConferenceSeriesId, mp.ConferenceInstanceId,
+          mp.Volume, mp.Issue, mp.FirstPage, mp.LastPage,
+          mp.ReferenceCount, mp.CitationCount, mp.EstimatedCitation,
+          mp.OriginalVenue, mp.FamilyId, mp.CreatedDate)
+      })(Encoders.product[MagPapers])
+  }
+
  def main(args: Array[String]): Unit = {

    val logger: Logger = LoggerFactory.getLogger(getClass)
@ -33,17 +51,11 @@ object SparkProcessMAG {
    implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication]
    implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)

-    logger.info("Phase 1) make uninque DOI in Papers:")
+    logger.info("Phase 1) make uninue DOI in Papers:")
    val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers]

    // Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one
-    val result: RDD[MagPapers] = d.where(col("Doi").isNotNull)
-      .rdd
-      .map{ p: MagPapers => Tuple2(p.Doi, p) }
-      .reduceByKey((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2))
-      .map(_._2)
-
-    val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
+    val distinctPaper: Dataset[MagPapers] = getDistinctResults(d)

    distinctPaper.write.mode(SaveMode.Overwrite).save(s"$workingPath/Papers_distinct")

--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala
@ -84,7 +84,7 @@ object ORCIDToOAF {
      JField("type", JString(typeValue)) <- extIds
      JField("value", JString(value)) <- extIds
      if "doi".equalsIgnoreCase(typeValue)
-    } yield (typeValue, value)
+    } yield (typeValue, DoiBoostMappingUtil.normalizeDoi(value))
    if (doi.nonEmpty) {
      return doi.map(l =>OrcidWork(oid, l._2))
    }
@ -102,7 +102,7 @@ object ORCIDToOAF {
  def convertTOOAF(input:ORCIDItem) :Publication = {
    val doi = input.doi
    val pub:Publication = new Publication
-    pub.setPid(List(createSP(doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
+    pub.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
    pub.setDataInfo(generateDataInfo())

    pub.setId(IdentifierFactory.createDOIBoostIdentifier(pub))
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala
@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.uw
 import eu.dnetlib.dhp.schema.common.ModelConstants
 import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
 import eu.dnetlib.dhp.schema.oaf.{AccessRight, Instance, OpenAccessRoute, Publication}
+import eu.dnetlib.doiboost.DoiBoostMappingUtil
 import org.json4s
 import org.json4s.DefaultFormats
 import org.json4s.jackson.JsonMethods.parse
@ -53,7 +54,10 @@ object UnpayWallToOAF {
    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json: json4s.JValue = parse(input)

-    val doi = (json \"doi").extract[String]
+    val doi = DoiBoostMappingUtil.normalizeDoi((json \"doi").extract[String])
+
+    if(doi == null)
+      return null

    val is_oa = (json\ "is_oa").extract[Boolean]

--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml
@ -111,7 +111,7 @@
            <case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
            <case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>
            <case to="GenerateCrossrefDataset">${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'}</case>
-            <default to="ImportCrossRef"/>
+            <default to="ProcessORCID"/>
        </switch>
    </decision>

@ -319,7 +319,7 @@
                --executor-memory=${sparkExecutorIntersectionMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.sql.shuffle.partitions=7680
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/DoiBoostAuthorMergerTest.java
@ -0,0 +1,406 @@
+
+package eu.dnetlib.dhp.doiboost;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.neethi.Assertion;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Author;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import eu.dnetlib.doiboost.DoiBoostAuthorMerger;
+import eu.dnetlib.pace.util.MapDocumentUtil;
+import scala.Tuple2;
+
+public class DoiBoostAuthorMergerTest {
+
+	private String publicationsBasePath;
+
+	private List<List<Author>> authors;
+
+
+	@BeforeEach
+	public void setUp() throws Exception {
+
+		publicationsBasePath = Paths
+			.get(DoiBoostAuthorMergerTest.class.getResource("/eu/dnetlib/dhp/doiboost").toURI())
+			.toFile()
+			.getAbsolutePath();
+
+	}
+
+	@Test
+	public void mergeTestOrcid() {
+
+		authors = readSample(publicationsBasePath + "/matching_authors_first.json", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+		for (List<Author> authors1 : authors) {
+			System.out.println("List " + (authors.indexOf(authors1) + 1));
+			for (Author author : authors1) {
+				System.out.println(authorToString(author));
+			}
+		}
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors,  true);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+		Assertions.assertEquals(10, merge.size());
+
+		Assertions.assertEquals(3, merge.stream().filter(a -> a.getPid() != null).count());
+
+		merge
+			.stream()
+			.filter(a -> a.getPid() != null)
+			.forEach(
+				a -> Assertions
+					.assertTrue(
+						a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))));
+		merge.stream().filter(a -> a.getPid() != null).forEach(a -> {
+			try {
+				System.out.println(new ObjectMapper().writeValueAsString(a));
+			} catch (JsonProcessingException e) {
+				e.printStackTrace();
+			}
+		});
+
+	}
+
+	public <T> List<Tuple2<String, T>> readSample(String path, Class<T> clazz) {
+		List<Tuple2<String, T>> res = new ArrayList<>();
+		BufferedReader reader;
+		try {
+			reader = new BufferedReader(new FileReader(path));
+			String line = reader.readLine();
+			while (line != null) {
+				res
+					.add(
+						new Tuple2<>(
+							MapDocumentUtil.getJPathString("$.id", line),
+							new ObjectMapper().readValue(line, clazz)));
+				// read next line
+				line = reader.readLine();
+			}
+			reader.close();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+
+		return res;
+	}
+
+	public String authorToString(Author a) {
+
+		String print = "Fullname = ";
+		print += a.getFullname() + " pid = [";
+		if (a.getPid() != null)
+			for (StructuredProperty sp : a.getPid()) {
+				print += sp.toComparableString() + " ";
+			}
+		print += "]";
+		return print;
+	}
+
+	@Test
+	public void mergeTestMAG() {
+
+		authors = readSample(publicationsBasePath + "/matching_authors_second", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+		for (List<Author> authors1 : authors) {
+			System.out.println("List " + (authors.indexOf(authors1) + 1));
+			for (Author author : authors1) {
+				System.out.println(authorToString(author));
+			}
+		}
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors, true);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+		Assertions.assertEquals(10, merge.size());
+
+		Assertions.assertEquals(10, merge.stream().filter(a -> a.getPid() != null).count());
+
+		merge
+				.stream()
+				.filter(a -> a.getPid() != null)
+				.forEach(
+						a -> Assertions
+								.assertTrue(
+										a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals("URL"))));
+		merge.stream().filter(a -> a.getPid() != null).forEach(a -> {
+			try {
+				System.out.println(new ObjectMapper().writeValueAsString(a));
+			} catch (JsonProcessingException e) {
+				e.printStackTrace();
+			}
+		});
+
+	}
+
+
+	@Test
+	public void mergeTestCrossrefEmpty1() throws JsonProcessingException {
+
+		authors = readSample(publicationsBasePath + "/empty_crossref_authors_first.json", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors,  true);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+		Assertions.assertEquals(3, merge.size());
+
+		Assertions.assertEquals(3, merge.stream().filter(a -> a.getPid() != null).count());
+
+		merge
+				.stream()
+				.filter(a -> a.getPid() != null)
+				.forEach(
+						a -> Assertions
+								.assertTrue(
+										a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))));
+		merge.stream().filter(a -> a.getPid() != null).forEach(a -> {
+			try {
+				System.out.println(new ObjectMapper().writeValueAsString(a));
+			} catch (JsonProcessingException e) {
+				e.printStackTrace();
+			}
+		});
+
+		System.out.println(new ObjectMapper().writeValueAsString(merge));
+
+	}
+
+
+	@Test
+	public void mergeTestCrossrefEmpty2() throws JsonProcessingException {
+
+		authors = readSample(publicationsBasePath + "/empty_crossref_authors_second.json", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors, false);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+		Assertions.assertEquals(10, merge.size());
+
+		Assertions.assertEquals(10, merge.stream().filter(a -> a.getPid() != null).count());
+
+		merge
+				.stream()
+				.filter(a -> a.getPid() != null)
+				.forEach(
+						a -> Assertions
+								.assertTrue(
+										a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals("URL"))));
+		merge.stream().filter(a -> a.getPid() != null).forEach(a -> {
+			try {
+				System.out.println(new ObjectMapper().writeValueAsString(a));
+			} catch (JsonProcessingException e) {
+				e.printStackTrace();
+			}
+		});
+
+		Assertions.assertTrue(3 == merge.stream().filter(a -> a.getPid() !=null)
+		.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
+
+	}
+
+	@Test
+	public void mergeTestCrossrefEmpty3() throws JsonProcessingException {
+
+		authors = readSample(publicationsBasePath + "/empty_crossref_author_third.json", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors,  true);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+		Assertions.assertEquals(10, merge.size());
+
+		Assertions.assertEquals(10, merge.stream().filter(a -> a.getPid() != null).count());
+
+		merge
+				.stream()
+				.filter(a -> a.getPid() != null)
+				.forEach(
+						a -> Assertions
+								.assertTrue(
+										a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals("URL"))));
+
+		Assertions.assertTrue(3 == merge.stream().filter(a -> a.getPid() !=null)
+				.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
+
+
+	}
+
+
+	@Test
+	public void mergeTestCrossrefEmpty4() throws JsonProcessingException {
+
+		authors = readSample(publicationsBasePath + "/empty_crossref_author_fourth.json", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors,  true);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+		Assertions.assertEquals(3, merge.size());
+
+		Assertions.assertEquals(3, merge.stream().filter(a -> a.getPid() != null).count());
+
+
+		Assertions.assertTrue(3 == merge.stream().filter(a -> a.getPid() !=null)
+				.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
+
+
+	}
+
+	@Test
+	public void shouldMergeTest1() throws JsonProcessingException {
+
+		authors = readSample(publicationsBasePath + "/should_appear_author1.json", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors,  true);
+
+		Assertions.assertTrue(6 == merge.stream().filter(a -> a.getPid() !=null)
+				.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
+
+		Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null)
+				.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count());
+
+		merge.stream().filter(a -> a.getRank() == 26)
+				.forEach(a ->
+						Assertions.assertTrue(a.getPid()
+								.stream()
+								.anyMatch(pid -> pid.getValue().equals("0000-0002-2445-5275")
+										&& pid.getQualifier().getClassid().equals(ModelConstants.ORCID)
+								)
+						)
+				);
+
+
+	}
+
+	@Test
+	public void shouldMergeTest2() throws JsonProcessingException {
+
+		authors = readSample(publicationsBasePath + "/should_appear_author2.json", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors,  true);
+
+
+
+		Assertions.assertTrue(5 == merge.stream().filter(a -> a.getPid() !=null)
+				.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
+
+		Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null)
+				.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count());
+
+		merge.stream().filter(a -> a.getFullname().equals("da luz geraldo eduardo"))
+				.forEach(a ->
+						Assertions.assertTrue(a.getPid()
+								.stream()
+								.anyMatch(pid -> pid.getValue().equals("http://orcid.org/0000-0003-2434-0387")
+										&& pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)
+								)
+						)
+				);
+
+
+	}
+
+	@Test
+	public void shouldNotMergeTest1() throws JsonProcessingException {
+
+		authors = readSample(publicationsBasePath + "/should_appear_author3.json", Publication.class)
+				.stream()
+				.map(p -> p._2().getAuthor())
+				.collect(Collectors.toList());
+
+
+		List<Author> merge = DoiBoostAuthorMerger.merge(authors,  true);
+
+		System.out.println("Merge ");
+		for (Author author : merge) {
+			System.out.println(authorToString(author));
+		}
+
+//		Assertions.assertTrue(5 == merge.stream().filter(a -> a.getPid() !=null)
+//				.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
+//
+//		Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null)
+//				.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count());
+//
+//		merge.stream().filter(a -> a.getFullname().equals("da luz geraldo eduardo"))
+//				.forEach(a ->
+//						Assertions.assertTrue(a.getPid()
+//								.stream()
+//								.anyMatch(pid -> pid.getValue().equals("http://orcid.org/0000-0003-2434-0387")
+//										&& pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)
+//								)
+//						)
+//				);
+
+
+	}
+}
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/NormalizeDoiTest.scala
@ -0,0 +1,46 @@
+package eu.dnetlib.dhp.doiboost
+
+import eu.dnetlib.doiboost.DoiBoostMappingUtil
+import org.junit.jupiter.api.Test
+
+class NormalizeDOITest {
+
+  @Test
+  def doiDSLowerCase():Unit = {
+    val doi ="10.1042/BCJ20160876"
+
+    assert(DoiBoostMappingUtil.normalizeDoi(doi).equals(doi.toLowerCase()))
+
+  }
+
+
+  @Test
+  def doiFiltered():Unit = {
+    val doi = "0.1042/BCJ20160876"
+
+    assert(DoiBoostMappingUtil.normalizeDoi(doi) == null)
+  }
+
+  @Test
+  def doiFiltered2():Unit = {
+    val doi = "https://doi.org/0.1042/BCJ20160876"
+
+    assert(DoiBoostMappingUtil.normalizeDoi(doi) == null)
+  }
+
+
+  @Test
+  def doiCleaned():Unit = {
+    val doi = "https://doi.org/10.1042/BCJ20160876"
+
+    assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase()))
+  }
+
+  @Test
+  def doiCleaned1():Unit = {
+    val doi = "https://doi.org/10.1042/ BCJ20160876"
+
+    assert(DoiBoostMappingUtil.normalizeDoi(doi).equals("10.1042/BCJ20160876".toLowerCase()))
+  }
+
+}
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala
@ -461,5 +461,37 @@ class CrossrefMappingTest {
 //    })
  }

+  @Test
+  def testNormalizeDOI(): Unit = {
+    val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString
+    val line :String = "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}],"
+    val json = template.replace("%s", line)
+    val resultList: List[Oaf] = Crossref2Oaf.convert(json)
+    assertTrue(resultList.nonEmpty)
+    val items = resultList.filter(p => p.isInstanceOf[Publication])
+    val result: Result = items.head.asInstanceOf[Publication]
+
+    result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi")))
+    assertTrue(result.getPid.size() == 1)
+    result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase())))
+
+  }
+
+  @Test
+  def testNormalizeDOI2(): Unit = {
+    val template = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString
+
+    val resultList: List[Oaf] = Crossref2Oaf.convert(template)
+    assertTrue(resultList.nonEmpty)
+    val items = resultList.filter(p => p.isInstanceOf[Publication])
+    val result: Result = items.head.asInstanceOf[Publication]
+
+    result.getPid.asScala.foreach(pid => assertTrue(pid.getQualifier.getClassid.equals("doi")))
+    assertTrue(result.getPid.size() == 1)
+    result.getPid.asScala.foreach(pid => assertTrue(pid.getValue.equals("10.26850/1678-4618EQJ.v35.1.2010.p41-46".toLowerCase())))
+
+  }
+
+

 }
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala
@ -4,7 +4,7 @@ import java.sql.Timestamp

 import eu.dnetlib.dhp.schema.oaf.Publication
 import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.api.java.function.MapFunction
 import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
 import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
@ -62,6 +62,55 @@ class MAGMappingTest {
    logger.debug(description)

  }
+  @Test
+  def normalizeDoiTest():Unit = {
+
+    import org.json4s.jackson.Serialization.write
+    import org.json4s.DefaultFormats
+
+    implicit val formats = DefaultFormats
+
+    val conf = new SparkConf().setAppName("test").setMaster("local[2]")
+    val sc = new SparkContext(conf)
+    val spark = SparkSession.builder.config(sc.getConf).getOrCreate()
+    val path = getClass.getResource("magPapers.json").getPath
+
+    import org.apache.spark.sql.Encoders
+    val schema = Encoders.product[MagPapers].schema
+
+    import spark.implicits._
+    val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers]
+    val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers)
+    assertTrue(ret.count == 10)
+    ret.take(10).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase())))
+
+    spark.close()
+  }
+
+  @Test
+  def normalizeDoiTest2():Unit = {
+
+    import org.json4s.jackson.Serialization.write
+    import org.json4s.DefaultFormats
+
+    implicit val formats = DefaultFormats
+
+    val conf = new SparkConf().setAppName("test").setMaster("local[2]")
+    val sc = new SparkContext(conf)
+    val spark = SparkSession.builder.config(sc.getConf).getOrCreate()
+    val path = getClass.getResource("duplicatedMagPapers.json").getPath
+
+    import org.apache.spark.sql.Encoders
+    val schema = Encoders.product[MagPapers].schema
+
+    import spark.implicits._
+    val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers]
+    val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers)
+    assertTrue(ret.count == 8)
+    ret.take(8).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase())))
+    spark.close()
+    //ret.take(8).foreach(mp => println(write(mp)))
+  }


 }
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala
@ -12,6 +12,8 @@ import org.slf4j.{Logger, LoggerFactory}
 import java.nio.file.Path
 import scala.io.Source

+import scala.collection.JavaConversions._
+
 class MappingORCIDToOAFTest {
  val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass)
  val mapper = new ObjectMapper()
@ -59,13 +61,30 @@ class MappingORCIDToOAFTest {
    assertTrue(oA == p.count())
    println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p.first()))

+   spark.close()
+  }
+
+
+  @Test
+  def testExtractDat1():Unit ={
+
+
+
+    val aList: List[OrcidAuthor] = List(OrcidAuthor("0000-0002-4335-5309", Some("Lucrecia"), Some("Curto"), null, null, null ),
+      OrcidAuthor("0000-0001-7501-3330", Some("Emilio"), Some("Malchiodi"), null, null, null ), OrcidAuthor("0000-0002-5490-9186", Some("Sofia"), Some("Noli Truant"), null, null, null ))
+
+    val orcid:ORCIDItem = ORCIDItem("10.1042/BCJ20160876", aList)
+
+    val oaf = ORCIDToOAF.convertTOOAF(orcid)
+    assert(oaf.getPid.size() == 1)
+    oaf.getPid.toList.foreach(pid => assert(pid.getQualifier.getClassid.equals("doi")))
+    oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876")))
+    //println(mapper.writeValueAsString(oaf))
+

  }




-
-
-
 }
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/uw/UnpayWallMappingTest.scala
@ -20,16 +20,26 @@ class UnpayWallMappingTest {

    val Ilist = Source.fromInputStream(getClass.getResourceAsStream("input.json")).mkString

-
+    var i:Int = 0
    for (line <-Ilist.lines) {
      val p = UnpayWallToOAF.convertToOAF(line)

      if(p!= null) {
        assertTrue(p.getInstance().size()==1)
+        if (i== 0){
+          assertTrue(p.getPid.get(0).getValue.equals("10.1038/2211089b0"))
+        }
+        if (i== 1){
+          assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00058.s001"))
+        }
+        if (i== 2){
+          assertTrue(p.getPid.get(0).getValue.equals("10.1021/acs.bioconjchem.8b00086.s001"))
+        }
        logger.info(s"ID : ${p.getId}")
      }
      assertNotNull(line)
      assertTrue(line.nonEmpty)
+       i = i+1
    }


@ -39,7 +49,9 @@ class UnpayWallMappingTest {
    val item = UnpayWallToOAF.convertToOAF(l)

    assertEquals(item.getInstance().get(0).getAccessright.getOpenAccessRoute, OpenAccessRoute.bronze)
+
    logger.info(mapper.writeValueAsString(item))
+
  }

 }
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_fourth.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_fourth.json
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_third.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_author_third.json
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_first.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_first.json
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_second.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/empty_crossref_authors_second.json
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_first.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_first.json
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/matching_authors_second
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author1.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author1.json
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author2.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author2.json
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author3.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/should_appear_author3.json
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article.json
@ -1,5 +1,5 @@
 {
-  "DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46",
+  "DOI": " 10.26850/1678-4618eqj.v35.1.2010.p41-46",
  "issued": {
    "date-parts": [
      [
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article_funder_template.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/article_funder_template.json
@ -1,5 +1,5 @@
 {
-  "DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46",
+  "DOI": "10.26850/1678-4618EQJ.v35.1.2010.p41-46",
  "issued": {
    "date-parts": [
      [
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/duplicatedMagPapers.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/duplicatedMagPapers.json
@ -0,0 +1,10 @@
+[{"PaperId":2866429360,"Rank":1,"Doi":"10.5465/AMBPP.2018.12619SYMPOSIUM","DocType":null,"PaperTitle":"new directions in research on conflict dynamics","OriginalTitle":"New Directions in Research on Conflict Dynamics","BookTitle":null,"Year":2018,"Date":"2018-07-09T00:00:00Z","Publisher":"Academy of Management Briarcliff Manor, NY 10510","JournalId":null,"Volume":"2018","Issue":"1","FirstPage":"12619","LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Academy of Management Proceedings","CreatedDate":"2018-07-19T00:00:00Z"},
+  {"PaperId":2871494677,"Rank":2,"Doi":"10.1007/978-981-10-8971-8_33","DocType":null,"PaperTitle":"wild flame detection using weight adaptive particle filter from monocular video","OriginalTitle":"Wild Flame Detection Using Weight Adaptive Particle Filter from Monocular Video","BookTitle":null,"Year":2019,"Date":"2019-01-01T00:00:00Z","Publisher":"Springer, Singapore","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"357","LastPage":"365","ReferenceCount":14,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-07-19T00:00:00Z"},
+  {"PaperId":2883520096,"Rank":3,"Doi":"10.5465/AMBPP .2018.12619SYMPOSIUM","DocType":"Journal","PaperTitle":"elaboracion de un corpus cacografico desde la disponibilidad lexica en estudiantes sevillanos un analisis para la ensenanza de la lengua","OriginalTitle":"Elaboración de un corpus cacográfico desde la disponibilidad léxica en estudiantes sevillanos. Un análisis para la enseñanza de la lengua","BookTitle":null,"Year":2018,"Date":"2018-07-13T00:00:00Z","Publisher":"Poli papers","JournalId":2738339871,"Volume":"13","Issue":"1","FirstPage":"119","LastPage":"131","ReferenceCount":28,"CitationCount":2,"EstimatedCitation":2,"OriginalVenue":"Revista de Lingüística y Lenguas Aplicadas","CreatedDate":"2018-08-03T00:00:00Z"},
+  {"PaperId":2883800636,"Rank":4,"Doi":"10.1007/978-3-319-92513-4_4","DocType":null,"PaperTitle":"cognitive advantage of bilingualism and its criticisms","OriginalTitle":"Cognitive Advantage of Bilingualism and Its Criticisms","BookTitle":null,"Year":2018,"Date":"2018-01-01T00:00:00Z","Publisher":"Springer, Cham","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"67","LastPage":"89","ReferenceCount":74,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-08-03T00:00:00Z"},
+  {"PaperId":2885023064,"Rank":5,"Doi":"10.1097/NNA.0000000000000647","DocType":"Journal","PaperTitle":"enhancing and advancing shared governance through a targeted decision making redesign","OriginalTitle":"Enhancing and Advancing Shared Governance Through a Targeted Decision-Making Redesign.","BookTitle":null,"Year":2018,"Date":"2018-09-01T00:00:00Z","Publisher":"J Nurs Adm","JournalId":194945867,"Volume":"48","Issue":"9","FirstPage":"445","LastPage":"451","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Journal of Nursing Administration","CreatedDate":"2018-08-22T00:00:00Z"},
+  {"PaperId":2885607541,"Rank":1,"Doi":"10.1007/S10465-018-9283-7","DocType":"Journal","PaperTitle":"dance movement therapists attitudes and actions regarding lgbtqi and gender nonconforming communities","OriginalTitle":"Dance/Movement Therapists’ Attitudes and Actions Regarding LGBTQI and Gender Nonconforming Communities","BookTitle":null,"Year":2018,"Date":"2018-08-07T00:00:00Z","Publisher":"Springer US","JournalId":104993962,"Volume":"40","Issue":"2","FirstPage":"202","LastPage":"223","ReferenceCount":40,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"American Journal of Dance Therapy","CreatedDate":"2018-08-22T00:00:00Z"},
+  {"PaperId":2886182429,"Rank":2,"Doi":"10.13039/501100003329","DocType":null,"PaperTitle":"caracteres de adaptacion en judia comun phaseolus vulgaris l aproximacion genetica e identificacion de qtls","OriginalTitle":"Caracteres de adaptación en judía común (Phaseolus vulgaris L.): aproximación genética e identificación de QTLs","BookTitle":null,"Year":2017,"Date":"2017-06-15T00:00:00Z","Publisher":"CSIC - Misión Biológica de Galicia (MBG)","JournalId":null,"Volume":null,"Issue":null,"FirstPage":null,"LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":null,"CreatedDate":"2018-08-22T00:00:00Z"},
+  {"PaperId":2887149460,"Rank":3,"Doi":"10.1093/FEMSLE/FNY192","DocType":"Journal","PaperTitle":"small extracellular particles with big potential for horizontal gene transfer membrane vesicles and gene transfer agents","OriginalTitle":"Small extracellular particles with big potential for horizontal gene transfer: membrane vesicles and gene transfer agents.","BookTitle":null,"Year":2018,"Date":"2018-10-01T00:00:00Z","Publisher":"Narnia","JournalId":34954451,"Volume":"365","Issue":"19","FirstPage":null,"LastPage":null,"ReferenceCount":124,"CitationCount":13,"EstimatedCitation":13,"OriginalVenue":"Fems Microbiology Letters","CreatedDate":"2018-08-22T00:00:00Z"},
+  {"PaperId":2887446149,"Rank":4,"Doi":"10.5465/ambpp.2018.12619symposium","DocType":"Journal","PaperTitle":"notes from the field toxigenic vibrio cholerae o141 in a traveler to florida nebraska 2017","OriginalTitle":"Notes from the Field: Toxigenic Vibrio cholerae O141 in a Traveler to Florida — Nebraska, 2017","BookTitle":null,"Year":2018,"Date":"2018-08-03T00:00:00Z","Publisher":"Centers for Disease Control MMWR Office","JournalId":183158886,"Volume":"67","Issue":"30","FirstPage":"838","LastPage":"839","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Morbidity and Mortality Weekly Report","CreatedDate":"2018-08-22T00:00:00Z"},
+  {"PaperId":2889180499,"Rank":5,"Doi":"10.1007/S10924-018-1299-Z","DocType":"Journal","PaperTitle":"hybrid adsorbent materials obtained by the combination of poly ethylene alt maleic anhydride with lignin and lignosulfonate","OriginalTitle":"Hybrid Adsorbent Materials Obtained by the Combination of Poly(ethylene-alt-maleic anhydride) with Lignin and Lignosulfonate","BookTitle":null,"Year":2018,"Date":"2018-08-30T00:00:00Z","Publisher":"Springer US","JournalId":193665811,"Volume":"26","Issue":"11","FirstPage":"4293","LastPage":"4302","ReferenceCount":29,"CitationCount":5,"EstimatedCitation":5,"OriginalVenue":"Journal of Polymers and The Environment","CreatedDate":"2018-09-07T00:00:00Z"}]
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/magPapers.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/mag/magPapers.json
@ -0,0 +1,10 @@
+[{"PaperId":2866429360,"Rank":1,"Doi":"10.5465/AMBPP.2018.12619SYMPOSIUM","DocType":null,"PaperTitle":"new directions in research on conflict dynamics","OriginalTitle":"New Directions in Research on Conflict Dynamics","BookTitle":null,"Year":2018,"Date":"2018-07-09T00:00:00Z","Publisher":"Academy of Management Briarcliff Manor, NY 10510","JournalId":null,"Volume":"2018","Issue":"1","FirstPage":"12619","LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Academy of Management Proceedings","CreatedDate":"2018-07-19T00:00:00Z"},
+{"PaperId":2871494677,"Rank":2,"Doi":"10.1007/978-981-10-8971-8_33","DocType":null,"PaperTitle":"wild flame detection using weight adaptive particle filter from monocular video","OriginalTitle":"Wild Flame Detection Using Weight Adaptive Particle Filter from Monocular Video","BookTitle":null,"Year":2019,"Date":"2019-01-01T00:00:00Z","Publisher":"Springer, Singapore","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"357","LastPage":"365","ReferenceCount":14,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-07-19T00:00:00Z"},
+{"PaperId":2883520096,"Rank":3,"Doi":"10.4995/RLYLA.2018.9176","DocType":"Journal","PaperTitle":"elaboracion de un corpus cacografico desde la disponibilidad lexica en estudiantes sevillanos un analisis para la ensenanza de la lengua","OriginalTitle":"Elaboración de un corpus cacográfico desde la disponibilidad léxica en estudiantes sevillanos. Un análisis para la enseñanza de la lengua","BookTitle":null,"Year":2018,"Date":"2018-07-13T00:00:00Z","Publisher":"Poli papers","JournalId":2738339871,"Volume":"13","Issue":"1","FirstPage":"119","LastPage":"131","ReferenceCount":28,"CitationCount":2,"EstimatedCitation":2,"OriginalVenue":"Revista de Lingüística y Lenguas Aplicadas","CreatedDate":"2018-08-03T00:00:00Z"},
+{"PaperId":2883800636,"Rank":4,"Doi":"10.1007/978-3-319-92513-4_4","DocType":null,"PaperTitle":"cognitive advantage of bilingualism and its criticisms","OriginalTitle":"Cognitive Advantage of Bilingualism and Its Criticisms","BookTitle":null,"Year":2018,"Date":"2018-01-01T00:00:00Z","Publisher":"Springer, Cham","JournalId":null,"Volume":null,"Issue":null,"FirstPage":"67","LastPage":"89","ReferenceCount":74,"CitationCount":1,"EstimatedCitation":1,"OriginalVenue":null,"CreatedDate":"2018-08-03T00:00:00Z"},
+{"PaperId":2885023064,"Rank":5,"Doi":"10.1097/NNA.0000000000000647","DocType":"Journal","PaperTitle":"enhancing and advancing shared governance through a targeted decision making redesign","OriginalTitle":"Enhancing and Advancing Shared Governance Through a Targeted Decision-Making Redesign.","BookTitle":null,"Year":2018,"Date":"2018-09-01T00:00:00Z","Publisher":"J Nurs Adm","JournalId":194945867,"Volume":"48","Issue":"9","FirstPage":"445","LastPage":"451","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Journal of Nursing Administration","CreatedDate":"2018-08-22T00:00:00Z"},
+{"PaperId":2885607541,"Rank":1,"Doi":"10.1007/S10465-018-9283-7","DocType":"Journal","PaperTitle":"dance movement therapists attitudes and actions regarding lgbtqi and gender nonconforming communities","OriginalTitle":"Dance/Movement Therapists’ Attitudes and Actions Regarding LGBTQI and Gender Nonconforming Communities","BookTitle":null,"Year":2018,"Date":"2018-08-07T00:00:00Z","Publisher":"Springer US","JournalId":104993962,"Volume":"40","Issue":"2","FirstPage":"202","LastPage":"223","ReferenceCount":40,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"American Journal of Dance Therapy","CreatedDate":"2018-08-22T00:00:00Z"},
+{"PaperId":2886182429,"Rank":2,"Doi":"10.13039/501100003329","DocType":null,"PaperTitle":"caracteres de adaptacion en judia comun phaseolus vulgaris l aproximacion genetica e identificacion de qtls","OriginalTitle":"Caracteres de adaptación en judía común (Phaseolus vulgaris L.): aproximación genética e identificación de QTLs","BookTitle":null,"Year":2017,"Date":"2017-06-15T00:00:00Z","Publisher":"CSIC - Misión Biológica de Galicia (MBG)","JournalId":null,"Volume":null,"Issue":null,"FirstPage":null,"LastPage":null,"ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":null,"CreatedDate":"2018-08-22T00:00:00Z"},
+{"PaperId":2887149460,"Rank":3,"Doi":"10.1093/FEMSLE/FNY192","DocType":"Journal","PaperTitle":"small extracellular particles with big potential for horizontal gene transfer membrane vesicles and gene transfer agents","OriginalTitle":"Small extracellular particles with big potential for horizontal gene transfer: membrane vesicles and gene transfer agents.","BookTitle":null,"Year":2018,"Date":"2018-10-01T00:00:00Z","Publisher":"Narnia","JournalId":34954451,"Volume":"365","Issue":"19","FirstPage":null,"LastPage":null,"ReferenceCount":124,"CitationCount":13,"EstimatedCitation":13,"OriginalVenue":"Fems Microbiology Letters","CreatedDate":"2018-08-22T00:00:00Z"},
+{"PaperId":2887446149,"Rank":4,"Doi":"10.15585/MMWR.MM6730A7","DocType":"Journal","PaperTitle":"notes from the field toxigenic vibrio cholerae o141 in a traveler to florida nebraska 2017","OriginalTitle":"Notes from the Field: Toxigenic Vibrio cholerae O141 in a Traveler to Florida — Nebraska, 2017","BookTitle":null,"Year":2018,"Date":"2018-08-03T00:00:00Z","Publisher":"Centers for Disease Control MMWR Office","JournalId":183158886,"Volume":"67","Issue":"30","FirstPage":"838","LastPage":"839","ReferenceCount":0,"CitationCount":0,"EstimatedCitation":0,"OriginalVenue":"Morbidity and Mortality Weekly Report","CreatedDate":"2018-08-22T00:00:00Z"},
+{"PaperId":2889180499,"Rank":5,"Doi":"10.1007/S10924-018-1299-Z","DocType":"Journal","PaperTitle":"hybrid adsorbent materials obtained by the combination of poly ethylene alt maleic anhydride with lignin and lignosulfonate","OriginalTitle":"Hybrid Adsorbent Materials Obtained by the Combination of Poly(ethylene-alt-maleic anhydride) with Lignin and Lignosulfonate","BookTitle":null,"Year":2018,"Date":"2018-08-30T00:00:00Z","Publisher":"Springer US","JournalId":193665811,"Volume":"26","Issue":"11","FirstPage":"4293","LastPage":"4302","ReferenceCount":29,"CitationCount":5,"EstimatedCitation":5,"OriginalVenue":"Journal of Polymers and The Environment","CreatedDate":"2018-09-07T00:00:00Z"}]
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/uw/input.json
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/uw/input.json
@ -1,6 +1,6 @@
-{"doi": "10.1038/2211089b0", "year": 1969, "genre": "journal-article", "is_oa": true, "title": "Planning: Trees in Danger", "doi_url": "https://doi.org/10.1038/2211089b0", "updated": "2020-02-06T13:51:15.164623", "oa_status": "bronze", "publisher": "Springer Nature", "z_authors": [{"name": "Our Planning Correspondent"}], "is_paratext": false, "journal_name": "Nature", "oa_locations": [{"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0028-0836,1476-4687", "journal_issn_l": "0028-0836", "published_date": "1969-03-01", "best_oa_location": {"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
-{"doi": "10.1021/acs.bioconjchem.8b00058.s001", "year": null, "genre": "component", "is_oa": true, "title": "Engineering Reversible CellCell Interactions with Lipid Anchored Prosthetic Receptors", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "updated": "2020-04-04T21:15:41.966773", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
-{"doi": "10.1021/acs.bioconjchem.8b00086.s001", "year": null, "genre": "component", "is_oa": true, "title": "Rapid, Stoichiometric, Site-Specific Modification of Aldehyde-Containing Proteins Using a Tandem Knoevenagel-Intra Michael Addition Reaction", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "updated": "2020-04-04T21:24:50.688286", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
+{"doi": "10.1038/221 1089b0", "year": 1969, "genre": "journal-article", "is_oa": true, "title": "Planning: Trees in Danger", "doi_url": "https://doi.org/10.1038/2211089b0", "updated": "2020-02-06T13:51:15.164623", "oa_status": "bronze", "publisher": "Springer Nature", "z_authors": [{"name": "Our Planning Correspondent"}], "is_paratext": false, "journal_name": "Nature", "oa_locations": [{"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0028-0836,1476-4687", "journal_issn_l": "0028-0836", "published_date": "1969-03-01", "best_oa_location": {"url": "http://www.nature.com/articles/2211089b0.pdf", "pmh_id": null, "is_best": true, "license": null, "updated": "2018-07-11T09:19:40.598930", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "http://www.nature.com/articles/2211089b0.pdf", "url_for_landing_page": "https://doi.org/10.1038/2211089b0", "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
+{"doi": "10.1021/acs.bioconjchem.8b00058.  s001", "year": null, "genre": "component", "is_oa": true, "title": "Engineering Reversible CellCell Interactions with Lipid Anchored Prosthetic Receptors", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "updated": "2020-04-04T21:15:41.966773", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:13:39.352965", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00058.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
+{"doi": "10.1021/acs.bioconjCHEM.8b00086.s001", "year": null, "genre": "component", "is_oa": true, "title": "Rapid, Stoichiometric, Site-Specific Modification of Aldehyde-Containing Proteins Using a Tandem Knoevenagel-Intra Michael Addition Reaction", "doi_url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "updated": "2020-04-04T21:24:50.688286", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T21:22:19.694440", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acs.bioconjchem.8b00086.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": false}
 {"doi": "10.1192/bjp.89.375.270", "year": 1943, "genre": "journal-article", "is_oa": false, "title": "Unusual Pituitary Activity in a Case of Anorexia Nervosa", "doi_url": "https://doi.org/10.1192/bjp.89.375.270", "updated": "2020-03-09T08:54:12.827623", "oa_status": "closed", "publisher": "Royal College of Psychiatrists", "z_authors": [{"given": "M.", "family": "Reiss", "sequence": "first"}], "is_paratext": false, "journal_name": "Journal of Mental Science", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0368-315X,2514-9946", "journal_issn_l": "0368-315X", "published_date": "1943-04-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
 {"doi": "10.1016/s0167-7012(99)00056-1", "year": 1999, "genre": "journal-article", "is_oa": false, "title": "Development of radiographic and microscopic techniques for the characterization of bacterial transport in intact sediment cores from Oyster, Virginia", "doi_url": "https://doi.org/10.1016/s0167-7012(99)00056-1", "updated": "2020-04-05T11:15:40.634599", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Hailiang", "family": "Dong", "sequence": "first"}, {"given": "Tullis C.", "family": "Onstott", "sequence": "additional"}, {"given": "Mary F.", "family": "DeFlaun", "sequence": "additional"}, {"given": "Mark E.", "family": "Fuller", "sequence": "additional"}, {"given": "Kathleen M.", "family": "Gillespie", "sequence": "additional"}, {"given": "James K.", "family": "Fredrickson", "sequence": "additional"}], "is_paratext": false, "journal_name": "Journal of Microbiological Methods", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0167-7012", "journal_issn_l": "0167-7012", "published_date": "1999-08-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
 {"doi": "10.1086/mp.1905.2.issue-3", "year": 1905, "genre": "journal-issue", "is_oa": false, "title": null, "doi_url": "https://doi.org/10.1086/mp.1905.2.issue-3", "updated": "2020-02-07T15:51:44.560109", "oa_status": "closed", "publisher": "University of Chicago Press", "z_authors": null, "is_paratext": false, "journal_name": "Modern Philology", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0026-8232,1545-6951", "journal_issn_l": "0026-8232", "published_date": "1905-01-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
@ -38,7 +38,7 @@
 {"doi": "10.1016/s1067-991x(03)70006-6", "year": 2003, "genre": "journal-article", "is_oa": false, "title": "Use of the autolaunch method of dispatching a helicopter", "doi_url": "https://doi.org/10.1016/s1067-991x(03)70006-6", "updated": "2020-03-12T07:24:35.659404", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Kathleen S.", "family": "Berns", "sequence": "first"}, {"given": "Jeffery J.", "family": "Caniglia", "sequence": "additional"}, {"given": "Daniel G.", "family": "Hankins", "sequence": "additional"}, {"given": "Scott P.", "family": "Zietlow", "sequence": "additional"}], "is_paratext": false, "journal_name": "Air Medical Journal", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1067-991X", "journal_issn_l": "1067-991X", "published_date": "2003-05-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
 {"doi": "10.1016/j.clinimag.2015.12.002", "year": 2016, "genre": "journal-article", "is_oa": false, "title": "Imaging findings, diagnosis, and clinical outcomes in patients with mycotic aneurysms: single center experience", "doi_url": "https://doi.org/10.1016/j.clinimag.2015.12.002", "updated": "2020-03-12T17:56:16.049536", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Amy R.", "family": "Deipolyi", "sequence": "first"}, {"given": "Alexander", "family": "Bailin", "sequence": "additional"}, {"given": "Ali", "family": "Khademhosseini", "sequence": "additional"}, {"ORCID": "http://orcid.org/0000-0003-4984-1778", "given": "Rahmi", "family": "Oklu", "sequence": "additional", "authenticated-orcid": false}], "is_paratext": false, "journal_name": "Clinical Imaging", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0899-7071", "journal_issn_l": "0899-7071", "published_date": "2016-05-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
 {"doi": "10.1016/j.biocel.2013.05.012", "year": 2013, "genre": "journal-article", "is_oa": false, "title": "MAVS-mediated host cell defense is inhibited by Borna disease virus", "doi_url": "https://doi.org/10.1016/j.biocel.2013.05.012", "updated": "2020-03-09T20:49:25.975316", "oa_status": "closed", "publisher": "Elsevier BV", "z_authors": [{"given": "Yujun", "family": "Li", "sequence": "first"}, {"given": "Wuqi", "family": "Song", "sequence": "additional"}, {"given": "Jing", "family": "Wu", "sequence": "additional"}, {"given": "Qingmeng", "family": "Zhang", "sequence": "additional"}, {"given": "Junming", "family": "He", "sequence": "additional"}, {"given": "Aimei", "family": "Li", "sequence": "additional"}, {"given": "Jun", "family": "Qian", "sequence": "additional"}, {"given": "Aixia", "family": "Zhai", "sequence": "additional"}, {"given": "Yunlong", "family": "Hu", "sequence": "additional"}, {"given": "Wenping", "family": "Kao", "sequence": "additional"}, {"given": "Lanlan", "family": "Wei", "sequence": "additional"}, {"given": "Fengmin", "family": "Zhang", "sequence": "additional"}, {"given": "Dakang", "family": "Xu", "sequence": "additional"}], "is_paratext": false, "journal_name": "The International Journal of Biochemistry & Cell Biology", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1357-2725", "journal_issn_l": "1357-2725", "published_date": "2013-08-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
-{"doi": "10.1021/acsami.8b01074.s004", "year": null, "genre": "component", "is_oa": false, "title": "Solution Coating of Pharmaceutical Nanothin Films and Multilayer Nanocomposites with Controlled Morphology and Polymorphism", "doi_url": "https://doi.org/10.1021/acsami.8b01074.s004", "updated": "2020-04-04T21:02:07.815195", "oa_status": "closed", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
+{"doi": "10.1021/acsami.8b01074  .s004", "year": null, "genre": "component", "is_oa": false, "title": "Solution Coating of Pharmaceutical Nanothin Films and Multilayer Nanocomposites with Controlled Morphology and Polymorphism", "doi_url": "https://doi.org/10.1021/acsami.8b01074.s004", "updated": "2020-04-04T21:02:07.815195", "oa_status": "closed", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
 {"doi": "10.1093/nar/18.18.5552", "year": 1990, "genre": "journal-article", "is_oa": true, "title": "Nucleotide sequence of LTR-gag region of Rous sarcoma virus adapted to semi-permissive host", "doi_url": "https://doi.org/10.1093/nar/18.18.5552", "updated": "2020-02-07T07:59:06.754183", "oa_status": "green", "publisher": "Oxford University Press (OUP)", "z_authors": [{"given": "Vladimir I.", "family": "Kashuba", "sequence": "first"}, {"given": "Serge V.", "family": "Zubak", "sequence": "additional"}, {"given": "Vadim M.", "family": "Kavsan", "sequence": "additional"}, {"given": "Alla V.", "family": "Rynditch", "sequence": "additional"}, {"given": "Ivo", "family": "Hlozanek", "sequence": "additional"}], "is_paratext": false, "journal_name": "Nucleic Acids Research", "oa_locations": [{"url": "http://europepmc.org/articles/pmc332244?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:332244", "is_best": true, "license": null, "updated": "2017-10-22T11:38:23.025497", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH doi match)", "host_type": "repository", "endpoint_id": "pubmedcentral.nih.gov", "url_for_pdf": "http://europepmc.org/articles/pmc332244?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc332244", "repository_institution": "pubmedcentral.nih.gov"}, {"url": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC332244", "pmh_id": null, "is_best": false, "license": null, "updated": "2020-04-24T18:18:02.810779", "version": "publishedVersion", "evidence": "oa repository (via pmcid lookup)", "host_type": "repository", "endpoint_id": null, "url_for_pdf": null, "url_for_landing_page": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC332244", "repository_institution": null}], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0305-1048,1362-4962", "journal_issn_l": "0305-1048", "published_date": "1990-01-01", "best_oa_location": {"url": "http://europepmc.org/articles/pmc332244?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:332244", "is_best": true, "license": null, "updated": "2017-10-22T11:38:23.025497", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH doi match)", "host_type": "repository", "endpoint_id": "pubmedcentral.nih.gov", "url_for_pdf": "http://europepmc.org/articles/pmc332244?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc332244", "repository_institution": "pubmedcentral.nih.gov"}, "journal_is_in_doaj": false, "has_repository_copy": true}
 {"doi": "10.1021/acsami.8b01294.s001", "year": null, "genre": "component", "is_oa": true, "title": "Highly Elastic Biodegradable Single-Network Hydrogel for Cell Printing", "doi_url": "https://doi.org/10.1021/acsami.8b01294.s001", "updated": "2020-04-04T22:12:53.813586", "oa_status": "bronze", "publisher": "American Chemical Society (ACS)", "z_authors": null, "is_paratext": false, "journal_name": null, "oa_locations": [{"url": "https://doi.org/10.1021/acsami.8b01294.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T22:11:06.757648", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acsami.8b01294.s001", "url_for_landing_page": null, "repository_institution": null}, {"url": "http://europepmc.org/articles/pmc5876623?pdf=render", "pmh_id": "oai:pubmedcentral.nih.gov:5876623", "is_best": false, "license": "acs-specific: authorchoice/editors choice usage agreement", "updated": "2020-02-19T13:50:59.876849", "version": "publishedVersion", "evidence": "oa repository (via OAI-PMH title match)", "host_type": "repository", "endpoint_id": "ac9de7698155b820de7", "url_for_pdf": "http://europepmc.org/articles/pmc5876623?pdf=render", "url_for_landing_page": "http://europepmc.org/articles/pmc5876623", "repository_institution": "National Institutes of Health (USA) - US National Library of Medicine"}], "data_standard": 2, "journal_is_oa": false, "journal_issns": null, "journal_issn_l": null, "published_date": null, "best_oa_location": {"url": "https://doi.org/10.1021/acsami.8b01294.s001", "pmh_id": null, "is_best": true, "license": null, "updated": "2020-04-04T22:11:06.757648", "version": "publishedVersion", "evidence": "open (via free pdf)", "host_type": "publisher", "endpoint_id": null, "url_for_pdf": "https://doi.org/10.1021/acsami.8b01294.s001", "url_for_landing_page": null, "repository_institution": null}, "journal_is_in_doaj": false, "has_repository_copy": true}
 {"doi": "10.1097/scs.0b013e3181ef67ba", "year": 2010, "genre": "journal-article", "is_oa": false, "title": "Anomaly of the Internal Carotid Artery Detected During Tonsillectomy", "doi_url": "https://doi.org/10.1097/scs.0b013e3181ef67ba", "updated": "2020-02-10T19:05:26.462040", "oa_status": "closed", "publisher": "Ovid Technologies (Wolters Kluwer Health)", "z_authors": [{"given": "Serdar", "family": "Ceylan", "sequence": "first"}, {"given": "Serkan", "family": "Salman", "sequence": "additional"}, {"given": "Fatih", "family": "Bora", "sequence": "additional"}], "is_paratext": false, "journal_name": "Journal of Craniofacial Surgery", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1049-2275", "journal_issn_l": "1049-2275", "published_date": "2010-09-01", "best_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java
@ -3,8 +3,12 @@ package eu.dnetlib.dhp.oa.graph.raw;

 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

+import java.util.ArrayList;
+import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
@ -98,14 +102,9 @@ public class MergeClaimsApplication {
 		raw
 			.joinWith(claim, raw.col("_1").equalTo(claim.col("_1")), "full_outer")
 			.map(
-				(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>) value -> {
-					Optional<Tuple2<String, T>> opRaw = Optional.ofNullable(value._1());
-					Optional<Tuple2<String, T>> opClaim = Optional.ofNullable(value._2());
-
-					return opRaw.isPresent()
-						? opRaw.get()._2()
-						: opClaim.isPresent() ? opClaim.get()._2() : null;
-				},
+				(MapFunction<Tuple2<Tuple2<String, T>, Tuple2<String, T>>, T>) value -> processClaims(
+					Optional.ofNullable(value._1()),
+					Optional.ofNullable(value._2())),
 				Encoders.bean(clazz))
 			.filter(Objects::nonNull)
 			.map(
@ -117,6 +116,78 @@ public class MergeClaimsApplication {
 			.text(outPath);
 	}

+	private static <T extends Oaf> T processClaims(Optional<Tuple2<String, T>> opRaw,
+		Optional<Tuple2<String, T>> opClaim) {
+
+		// when both are present
+		if (opClaim.isPresent() && opRaw.isPresent()) {
+			T oafClaim = opClaim.get()._2();
+			if (oafClaim instanceof Result) {
+				T oafRaw = opRaw.get()._2();
+
+				// merge the context lists from both oaf objects ...
+				final List<Context> context = mergeContexts((Result) oafClaim, (Result) oafRaw);
+
+				// ... and set it on the result from the aggregator
+				((Result) oafRaw).setContext(context);
+				return oafRaw;
+			}
+		}
+
+		// otherwise prefer the result from the aggregator
+		return opRaw.isPresent()
+			? opRaw.get()._2()
+			: opClaim.map(Tuple2::_2).orElse(null);
+	}
+
+	private static List<Context> mergeContexts(Result oafClaim, Result oafRaw) {
+		return new ArrayList<>(
+			Stream
+				.concat(
+					Optional
+						.ofNullable(oafClaim.getContext())
+						.map(List::stream)
+						.orElse(Stream.empty()),
+					Optional
+						.ofNullable(oafRaw.getContext())
+						.map(List::stream)
+						.orElse(Stream.empty()))
+				.collect(
+					Collectors
+						.toMap(
+							Context::getId,
+							c -> c,
+							(c1, c2) -> {
+								Context c = new Context();
+								c.setId(c1.getId());
+								c
+									.setDataInfo(
+										new ArrayList<>(
+											Stream
+												.concat(
+													Optional
+														.ofNullable(c1.getDataInfo())
+														.map(List::stream)
+														.orElse(Stream.empty()),
+													Optional
+														.ofNullable(c2.getDataInfo())
+														.map(List::stream)
+														.orElse(Stream.empty()))
+												.collect(
+													Collectors
+														.toMap(
+															d -> Optional
+																.ofNullable(d.getProvenanceaction())
+																.map(Qualifier::getClassid)
+																.orElse(""),
+															d -> d,
+															(d1, d2) -> d1))
+												.values()));
+								return c;
+							}))
+				.values());
+	}
+
 	private static <T extends Oaf> Dataset<T> readFromPath(
 		SparkSession spark, String path, Class<T> clazz) {
 		return spark
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@ -480,38 +480,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 				final String sourceId = createOpenaireId(sourceType, rs.getString("source_id"), false);
 				final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);

-				final Relation r1 = new Relation();
-				final Relation r2 = new Relation();
-
-				if (StringUtils.isNotBlank(validationDate)) {
-					r1.setValidated(true);
-					r1.setValidationDate(validationDate);
-					r2.setValidated(true);
-					r2.setValidationDate(validationDate);
-				}
-				r1.setCollectedfrom(COLLECTED_FROM_CLAIM);
-				r1.setSource(sourceId);
-				r1.setTarget(targetId);
-				r1.setDataInfo(DATA_INFO_CLAIM);
-				r1.setLastupdatetimestamp(lastUpdateTimestamp);
-
-				r2.setCollectedfrom(COLLECTED_FROM_CLAIM);
-				r2.setSource(targetId);
-				r2.setTarget(sourceId);
-				r2.setDataInfo(DATA_INFO_CLAIM);
-				r2.setLastupdatetimestamp(lastUpdateTimestamp);
+				Relation r1 = prepareRelation(sourceId, targetId, validationDate);
+				Relation r2 = prepareRelation(targetId, sourceId, validationDate);

 				final String semantics = rs.getString("semantics");

 				switch (semantics) {
 					case "resultResult_relationship_isRelatedTo":
-						r1.setRelType(RESULT_RESULT);
-						r1.setSubRelType(RELATIONSHIP);
-						r1.setRelClass(IS_RELATED_TO);
-
-						r2.setRelType(RESULT_RESULT);
-						r2.setSubRelType(RELATIONSHIP);
-						r2.setRelClass(IS_RELATED_TO);
+						r1 = setRelationSemantic(r1, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
+						r2 = setRelationSemantic(r2, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO);
 						break;
 					case "resultProject_outcome_produces":
 						if (!"project".equals(sourceType)) {
@ -521,13 +498,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 										"invalid claim, sourceId: %s, targetId: %s, semantics: %s",
 										sourceId, targetId, semantics));
 						}
-						r1.setRelType(RESULT_PROJECT);
-						r1.setSubRelType(OUTCOME);
-						r1.setRelClass(PRODUCES);
-
-						r2.setRelType(RESULT_PROJECT);
-						r2.setSubRelType(OUTCOME);
-						r2.setRelClass(IS_PRODUCED_BY);
+						r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
+						r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
+						break;
+					case "resultResult_publicationDataset_isRelatedTo":
+						r1 = setRelationSemantic(r1, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
+						r2 = setRelationSemantic(r2, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO);
 						break;
 					default:
 						throw new IllegalArgumentException("claim semantics not managed: " + semantics);
@ -540,6 +516,27 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
 		}
 	}

+	private Relation prepareRelation(String sourceId, String targetId, String validationDate) {
+		Relation r = new Relation();
+		if (StringUtils.isNotBlank(validationDate)) {
+			r.setValidated(true);
+			r.setValidationDate(validationDate);
+		}
+		r.setCollectedfrom(COLLECTED_FROM_CLAIM);
+		r.setSource(sourceId);
+		r.setTarget(targetId);
+		r.setDataInfo(DATA_INFO_CLAIM);
+		r.setLastupdatetimestamp(lastUpdateTimestamp);
+		return r;
+	}
+
+	private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) {
+		r.setRelType(relType);
+		r.setSubRelType(subRelType);
+		r.setRelClass(relClass);
+		return r;
+	}
+
 	private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
 		final Context context = new Context();
 		context.setId(id);