Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta

2022-06-28 11:06:32 +02:00 · 2022-06-28 11:06:32 +02:00 · ee1f1eeca2
parent 4b6913787b cba9c2b7cc
commit ee1f1eeca2
30 changed files with 1968 additions and 114 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
@ -1,18 +1,18 @@

 package eu.dnetlib.dhp.common;

+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.text.Normalizer;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
+import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.text.WordUtils;

+import com.ctc.wstx.dtd.LargePrefixedNameSet;
 import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
-import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.hash.Hashing;

@ -29,7 +29,19 @@ public class PacePerson {
 	private List<String> fullname = Lists.newArrayList();
 	private final String original;

-	private static Set<String> particles = null;
+	private static Set<String> particles;
+
+	static {
+		try {
+			particles = new HashSet<>(IOUtils
+				.readLines(
+					PacePerson.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/common/name_particles.txt")));
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+	}

 	/**
 	 * Capitalizes a string
@ -37,29 +49,20 @@ public class PacePerson {
 	 * @param s the string to capitalize
 	 * @return the input string with capital letter
 	 */
-	public static final String capitalize(final String s) {
+	public static String capitalize(final String s) {
+		if (particles.contains(s)) {
+			return s;
+		}
 		return WordUtils.capitalize(s.toLowerCase(), ' ', '-');
 	}

 	/**
 	 * Adds a dot to a string with length equals to 1
 	 */
-	public static final String dotAbbreviations(final String s) {
+	public static String dotAbbreviations(final String s) {
 		return s.length() == 1 ? s + "." : s;
 	}

-	public static Set<String> loadFromClasspath(final String classpath) {
-		final Set<String> h = new HashSet<>();
-		try {
-			for (final String s : IOUtils.readLines(PacePerson.class.getResourceAsStream(classpath))) {
-				h.add(s);
-			}
-		} catch (final Throwable e) {
-			return new HashSet<>();
-		}
-		return h;
-	}
-
 	/**
 	 * The constructor of the class. It fills the fields of the class basing on the input fullname.
 	 *
@ -128,10 +131,6 @@ public class PacePerson {
 	}

 	private List<String> splitTerms(final String s) {
-		if (particles == null) {
-			particles = loadFromClasspath("/eu/dnetlib/dhp/oa/graph/pace/name_particles.txt");
-		}
-
 		final List<String> list = Lists.newArrayList();
 		for (final String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
 			if (!particles.contains(part.toLowerCase())) {
@ -187,17 +186,36 @@ public class PacePerson {
 	}

 	public List<String> getCapitalFirstnames() {
-		return Lists
-			.newArrayList(
-				Iterables.transform(getNameWithAbbreviations(), PacePerson::capitalize));
+		return Optional
+			.ofNullable(getNameWithAbbreviations())
+			.map(
+				name -> name
+					.stream()
+					.map(PacePerson::capitalize)
+					.collect(Collectors.toList()))
+			.orElse(new ArrayList<>());
 	}

 	public List<String> getCapitalSurname() {
-		return Lists.newArrayList(Iterables.transform(surname, PacePerson::capitalize));
+		return Optional
+			.ofNullable(getSurname())
+			.map(
+				surname -> surname
+					.stream()
+					.map(PacePerson::capitalize)
+					.collect(Collectors.toList()))
+			.orElse(new ArrayList<>());
 	}

 	public List<String> getNameWithAbbreviations() {
-		return Lists.newArrayList(Iterables.transform(name, PacePerson::dotAbbreviations));
+		return Optional
+			.ofNullable(getName())
+			.map(
+				name -> name
+					.stream()
+					.map(PacePerson::dotAbbreviations)
+					.collect(Collectors.toList()))
+			.orElse(new ArrayList<>());
 	}

 	public boolean isAccurate() {
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/pace/name_particles.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/pace/name_particles.txt
@ -1,7 +1,8 @@
 van
+von
 der
 de
 dell
 sig
 mr
-mrs
+mrs
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.actionmanager.ror;

 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
-import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
@ -39,7 +38,6 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
-import eu.dnetlib.dhp.actionmanager.ror.model.Relationship;
 import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.HdfsSupport;
@ -51,7 +49,6 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
-import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.dhp.utils.DHPUtils;
 import scala.Tuple2;
@ -168,38 +165,10 @@ public class GenerateRorActionSetJob {
 		final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
 		res.add(new AtomicAction<>(Organization.class, o));

-		for (final Relationship rorRel : r.getRelationships()) {
-			if (rorRel.getType().equalsIgnoreCase("parent")) {
-				final String orgId1 = calculateOpenaireId(r.getId());
-				final String orgId2 = calculateOpenaireId(rorRel.getId());
-				res
-					.add(
-						new AtomicAction<>(Relation.class,
-							calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF)));
-				res
-					.add(
-						new AtomicAction<>(Relation.class,
-							calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF)));
-			}
-		}
-
 		return res;

 	}

-	private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) {
-		final Relation rel = new Relation();
-		rel.setSource(source);
-		rel.setTarget(target);
-		rel.setRelType(ORG_ORG_RELTYPE);
-		rel.setSubRelType(ModelConstants.RELATIONSHIP);
-		rel.setRelClass(relClass);
-		rel.setCollectedfrom(ROR_COLLECTED_FROM);
-		rel.setDataInfo(ROR_DATA_INFO);
-		rel.setLastupdatetimestamp(System.currentTimeMillis());
-		return rel;
-	}
-
 	private static String calculateOpenaireId(final String rorId) {
 		return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
@ -19,6 +19,8 @@ import org.slf4j.LoggerFactory;
 import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
 import eu.dnetlib.dhp.aggregation.common.ReportingJob;
 import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.file.FileCollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.file.FileGZipCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
 import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
@ -114,6 +116,10 @@ public class CollectorWorker extends ReportingJob {
 				return new OaiCollectorPlugin(clientParams);
 			case rest_json2xml:
 				return new RestCollectorPlugin(clientParams);
+			case file:
+				return new FileCollectorPlugin(fileSystem);
+			case fileGZip:
+				return new FileGZipCollectorPlugin(fileSystem);
 			case other:
 				final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
 					.ofNullable(api.getParams().get("other_plugin_type"))
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
@ -10,7 +10,7 @@ import eu.dnetlib.dhp.common.collection.CollectorException;
 public interface CollectorPlugin {

 	enum NAME {
-		oai, other, rest_json2xml;
+		oai, other, rest_json2xml, file, fileGZip;

 		public enum OTHER_NAME {
 			mdstore_mongodb_dump, mdstore_mongodb
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/file/AbstractSplittedRecordPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/file/AbstractSplittedRecordPlugin.java
@ -0,0 +1,80 @@
+
+package eu.dnetlib.dhp.collection.plugin.file;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.utils.XMLIterator;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+
+public abstract class AbstractSplittedRecordPlugin implements CollectorPlugin {
+
+	private static final Logger log = LoggerFactory.getLogger(AbstractSplittedRecordPlugin.class);
+
+	public static final String SPLIT_ON_ELEMENT = "splitOnElement";
+
+	private final FileSystem fileSystem;
+
+	public AbstractSplittedRecordPlugin(FileSystem fileSystem) {
+		this.fileSystem = fileSystem;
+	}
+
+	@Override
+	public Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException {
+
+		// get path to file
+		final Path filePath = Optional
+			.ofNullable(api.getBaseUrl())
+			.map(Path::new)
+			.orElseThrow(() -> new CollectorException("missing baseUrl"));
+
+		log.info("baseUrl: {}", filePath);
+
+		// check that path to file exists
+		try {
+			if (!fileSystem.exists(filePath)) {
+				throw new CollectorException("path does not exist: " + filePath);
+			}
+		} catch (IOException e) {
+			throw new CollectorException(e);
+		}
+
+		// get split element
+		final String splitOnElement = Optional
+			.ofNullable(api.getParams().get(SPLIT_ON_ELEMENT))
+			.orElseThrow(
+				() -> new CollectorException(String
+					.format("missing parameter '%s', required by the AbstractSplittedRecordPlugin", SPLIT_ON_ELEMENT)));
+
+		log.info("splitOnElement: {}", splitOnElement);
+
+		final BufferedInputStream bis = getBufferedInputStream(filePath);
+
+		Iterator<String> xmlIterator = new XMLIterator(splitOnElement, bis);
+
+		return StreamSupport
+			.stream(
+				Spliterators.spliteratorUnknownSize(xmlIterator, Spliterator.ORDERED),
+				false);
+	}
+
+	abstract protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException;
+
+	public FileSystem getFileSystem() {
+		return fileSystem;
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/file/FileCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/file/FileCollectorPlugin.java
@ -0,0 +1,33 @@
+
+package eu.dnetlib.dhp.collection.plugin.file;
+
+import java.io.BufferedInputStream;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.common.collection.CollectorException;
+
+public class FileCollectorPlugin extends AbstractSplittedRecordPlugin {
+
+	private static final Logger log = LoggerFactory.getLogger(FileCollectorPlugin.class);
+
+	public FileCollectorPlugin(FileSystem fileSystem) {
+		super(fileSystem);
+	}
+
+	@Override
+	protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException {
+
+		log.info("filePath: {}", filePath);
+
+		try {
+			FileSystem fs = super.getFileSystem();
+			return new BufferedInputStream(fs.open(filePath));
+		} catch (Exception e) {
+			throw new CollectorException("Error reading file " + filePath, e);
+		}
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipCollectorPlugin.java
@ -0,0 +1,35 @@
+
+package eu.dnetlib.dhp.collection.plugin.file;
+
+import java.io.BufferedInputStream;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.common.collection.CollectorException;
+
+public class FileGZipCollectorPlugin extends AbstractSplittedRecordPlugin {
+
+	private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPlugin.class);
+
+	public FileGZipCollectorPlugin(FileSystem fileSystem) {
+		super(fileSystem);
+	}
+
+	@Override
+	protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException {
+
+		log.info("filePath: {}", filePath);
+
+		try {
+			FileSystem fs = super.getFileSystem();
+			GZIPInputStream stream = new GZIPInputStream(fs.open(filePath));
+			return new BufferedInputStream(stream);
+		} catch (Exception e) {
+			throw new CollectorException("Error reading file " + filePath, e);
+		}
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
@ -19,7 +19,7 @@ import org.dom4j.io.XMLWriter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import eu.dnetlib.dhp.collection.XmlCleaner;
+import eu.dnetlib.dhp.collection.plugin.utils.XmlCleaner;
 import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
 import eu.dnetlib.dhp.common.collection.CollectorException;
 import eu.dnetlib.dhp.common.collection.HttpConnector2;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
@ -30,7 +30,7 @@ import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;

-import eu.dnetlib.dhp.collection.JsonUtils;
+import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
 import eu.dnetlib.dhp.common.collection.CollectorException;
 import eu.dnetlib.dhp.common.collection.HttpClientParams;

--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/JsonUtils.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/JsonUtils.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.collection;
+package eu.dnetlib.dhp.collection.plugin.utils;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XMLIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XMLIterator.java
@ -0,0 +1,177 @@
+
+package eu.dnetlib.dhp.collection.plugin.utils;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringWriter;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.util.Iterator;
+
+import javax.xml.stream.XMLEventFactory;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLEventWriter;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLOutputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class XMLIterator implements Iterator<String> {
+
+	private static final Log log = LogFactory.getLog(XMLIterator.class);
+
+	private ThreadLocal<XMLInputFactory> inputFactory = new ThreadLocal<XMLInputFactory>() {
+
+		@Override
+		protected XMLInputFactory initialValue() {
+			return XMLInputFactory.newInstance();
+		}
+	};
+
+	private ThreadLocal<XMLOutputFactory> outputFactory = new ThreadLocal<XMLOutputFactory>() {
+
+		@Override
+		protected XMLOutputFactory initialValue() {
+			return XMLOutputFactory.newInstance();
+		}
+	};
+
+	private ThreadLocal<XMLEventFactory> eventFactory = new ThreadLocal<XMLEventFactory>() {
+
+		@Override
+		protected XMLEventFactory initialValue() {
+			return XMLEventFactory.newInstance();
+		}
+	};
+
+	public static final String UTF_8 = "UTF-8";
+
+	final XMLEventReader parser;
+
+	private XMLEvent current = null;
+
+	private String element;
+
+	private InputStream inputStream;
+
+	public XMLIterator(final String element, final InputStream inputStream) {
+		super();
+		this.element = element;
+		this.inputStream = inputStream;
+		this.parser = getParser();
+		try {
+			this.current = findElement(parser);
+		} catch (XMLStreamException e) {
+			log.warn("cannot init parser position. No element found: " + element);
+			current = null;
+		}
+	}
+
+	@Override
+	public boolean hasNext() {
+		return current != null;
+	}
+
+	@Override
+	public String next() {
+		String result = null;
+		try {
+			result = copy(parser);
+			current = findElement(parser);
+			return result;
+		} catch (XMLStreamException e) {
+			throw new RuntimeException(String.format("error copying xml, built so far: '%s'", result), e);
+		}
+	}
+
+	@Override
+	public void remove() {
+		throw new UnsupportedOperationException();
+	}
+
+	@SuppressWarnings("finally")
+	private String copy(final XMLEventReader parser) throws XMLStreamException {
+		final StringWriter result = new StringWriter();
+		try {
+			final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(result);
+			final StartElement start = current.asStartElement();
+			final StartElement newRecord = eventFactory
+				.get()
+				.createStartElement(start.getName(), start.getAttributes(), start.getNamespaces());
+
+			// new root record
+			writer.add(newRecord);
+
+			// copy the rest as it is
+			while (parser.hasNext()) {
+				final XMLEvent event = parser.nextEvent();
+
+				// TODO: replace with depth tracking instead of close tag tracking.
+				if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals(element)) {
+					writer.add(event);
+					break;
+				}
+
+				writer.add(event);
+			}
+			writer.close();
+		} finally {
+			return result.toString();
+		}
+	}
+
+	/**
+	 * Looks for the next occurrence of the splitter element.
+	 *
+	 * @param parser
+	 * @return
+	 * @throws XMLStreamException
+	 */
+	private XMLEvent findElement(final XMLEventReader parser) throws XMLStreamException {
+
+		/*
+		 * if (current != null && element.equals(current.asStartElement().getName().getLocalPart())) { return current; }
+		 */
+
+		XMLEvent peek = parser.peek();
+		if (peek != null && peek.isStartElement()) {
+			String name = peek.asStartElement().getName().getLocalPart();
+			if (element.equals(name)) {
+				return peek;
+			}
+		}
+
+		while (parser.hasNext()) {
+			final XMLEvent event = parser.nextEvent();
+			if (event != null && event.isStartElement()) {
+				String name = event.asStartElement().getName().getLocalPart();
+				if (element.equals(name)) {
+					return event;
+				}
+			}
+		}
+		return null;
+	}
+
+	private XMLEventReader getParser() {
+		try {
+			return inputFactory.get().createXMLEventReader(sanitize(inputStream));
+		} catch (XMLStreamException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	private Reader sanitize(final InputStream in) {
+		final CharsetDecoder charsetDecoder = Charset.forName(UTF_8).newDecoder();
+		charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
+		charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+		return new InputStreamReader(in, charsetDecoder);
+	}
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XmlCleaner.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XmlCleaner.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.collection;
+package eu.dnetlib.dhp.collection.plugin.utils;

 import java.util.HashMap;
 import java.util.HashSet;
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
@ -47,13 +47,18 @@ object DataciteToOAFTransformation {
  }

  /** This method should skip record if json contains invalid text
-    * defined in gile datacite_filter
+    * defined in file datacite_filter
    *
-    * @param json
+    * @param record : unparsed datacite record
+    * @param json : parsed record
    * @return True if the record should be skipped
    */
-  def skip_record(json: String): Boolean = {
-    datacite_filter.exists(f => json.contains(f))
+  def skip_record(record: String, json: org.json4s.JValue): Boolean = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
+      .extractOrElse[String]("")
+      .equalsIgnoreCase("FAIRsharing")
+
  }

  @deprecated("this method will be removed", "dhp")
@ -304,12 +309,13 @@ object DataciteToOAFTransformation {
    vocabularies: VocabularyGroup,
    exportLinks: Boolean
  ): List[Oaf] = {
-    if (skip_record(input))
-      return List()

    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
    lazy val json = parse(input)

+    if (skip_record(input, json))
+      return List()
+
    val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
    val resourceTypeGeneral =
      (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileCollectorPluginTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileCollectorPluginTest.java
@ -0,0 +1,61 @@
+
+package eu.dnetlib.dhp.collection.plugin.file;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.stream.Stream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+import net.bytebuddy.asm.Advice;
+
+public class FileCollectorPluginTest {
+
+	private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
+
+	private final ApiDescriptor api = new ApiDescriptor();
+
+	private FileCollectorPlugin plugin;
+
+	private static final String SPLIT_ON_ELEMENT = "repository";
+
+	@BeforeEach
+	public void setUp() throws IOException {
+
+		final String gzipFile = this
+			.getClass()
+			.getResource("/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml")
+			.getFile();
+
+		api.setBaseUrl(gzipFile);
+
+		HashMap<String, String> params = new HashMap<>();
+		params.put("splitOnElement", SPLIT_ON_ELEMENT);
+
+		api.setParams(params);
+
+		FileSystem fs = FileSystem.get(new Configuration());
+		plugin = new FileCollectorPlugin(fs);
+	}
+
+	@Test
+	void test() throws CollectorException {
+
+		final Stream<String> stream = plugin.collect(api, new AggregatorReport());
+
+		stream.limit(10).forEach(s -> {
+			Assertions.assertTrue(s.length() > 0);
+			log.info(s);
+		});
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipCollectorPluginTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipCollectorPluginTest.java
@ -0,0 +1,68 @@
+
+package eu.dnetlib.dhp.collection.plugin.file;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.HashMap;
+import java.util.Objects;
+import java.util.stream.Stream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.junit.jupiter.api.*;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mockito;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.collection.ApiDescriptor;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+
+@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
+@ExtendWith(MockitoExtension.class)
+public class FileGZipCollectorPluginTest {
+
+	private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
+
+	private final ApiDescriptor api = new ApiDescriptor();
+
+	private FileGZipCollectorPlugin plugin;
+
+	private static final String SPLIT_ON_ELEMENT = "repository";
+
+	@BeforeEach
+	public void setUp() throws IOException {
+
+		final String gzipFile = Objects
+			.requireNonNull(
+				this
+					.getClass()
+					.getResource("/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml.gz"))
+			.getFile();
+
+		api.setBaseUrl(gzipFile);
+
+		HashMap<String, String> params = new HashMap<>();
+		params.put("splitOnElement", SPLIT_ON_ELEMENT);
+
+		api.setParams(params);
+
+		FileSystem fs = FileSystem.get(new Configuration());
+		plugin = new FileGZipCollectorPlugin(fs);
+	}
+
+	@Test
+	void test() throws CollectorException {
+
+		final Stream<String> stream = plugin.collect(api, new AggregatorReport());
+
+		stream.limit(10).forEach(s -> {
+			Assertions.assertTrue(s.length() > 0);
+			log.info(s);
+		});
+	}
+}
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml.gz
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml.gz
--- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
+++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala
@ -107,4 +107,19 @@ class DataciteToOAFTest extends AbstractVocabularyTest {

  }

+  @Test
+  def testFilter(): Unit = {
+    val record = Source
+      .fromInputStream(
+        getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json")
+      )
+      .mkString
+
+    val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
+    val res: List[Oaf] = DataciteToOAFTransformation.generateOAF(record, 0L, 0L, vocabularies, true)
+
+    assertTrue(res.isEmpty)
+
+  }
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
@ -18,14 +18,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.oaiIProvenance;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
 import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;

-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
+import java.util.*;

 import org.apache.commons.lang3.StringUtils;
 import org.dom4j.*;
@ -35,6 +28,7 @@ import com.google.common.collect.Sets;

 import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.AccessRight;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.Context;
@ -199,8 +193,13 @@ public abstract class AbstractMdRecordToOafMapper {
 		final List<Oaf> oafs = Lists.newArrayList(entity);

 		if (!oafs.isEmpty()) {
-			oafs.addAll(addProjectRels(doc, entity));
-			oafs.addAll(addOtherResultRels(doc, entity));
+			Set<Oaf> rels = Sets.newHashSet();
+
+			rels.addAll(addProjectRels(doc, entity));
+			rels.addAll(addOtherResultRels(doc, entity));
+			rels.addAll(addRelations(doc, entity));
+
+			oafs.addAll(rels);
 		}

 		return oafs;
@ -278,6 +277,46 @@ public abstract class AbstractMdRecordToOafMapper {
 		return res;
 	}

+	private List<Oaf> addRelations(Document doc, OafEntity entity) {
+
+		final List<Oaf> rels = Lists.newArrayList();
+
+		for (Object o : doc.selectNodes("//oaf:relation")) {
+			Element element = (Element) o;
+
+			final String target = StringUtils.trim(element.getText());
+			final String relType = element.attributeValue("relType");
+			final String subRelType = element.attributeValue("subRelType");
+			final String relClass = element.attributeValue("relClass");
+
+			if (StringUtils.isNotBlank(target) && StringUtils.isNotBlank(relType) && StringUtils.isNotBlank(subRelType)
+				&& StringUtils.isNotBlank(relClass)) {
+
+				final String relClassInverse = ModelSupport
+					.findInverse(ModelSupport.rel(relType, subRelType, relClass))
+					.getInverseRelClass();
+				final String validationdDate = ((Node) o).valueOf("@validationDate");
+
+				if (StringUtils.isNotBlank(target)) {
+					final String targetType = element.attributeValue("targetType");
+					if (StringUtils.isNotBlank(targetType)) {
+						final String targetId = createOpenaireId(targetType, target, true);
+						rels
+							.add(
+								getRelation(
+									entity.getId(), targetId, relType, subRelType, relClass, entity, validationdDate));
+						rels
+							.add(
+								getRelation(
+									targetId, entity.getId(), relType, subRelType, relClassInverse, entity,
+									validationdDate));
+					}
+				}
+			}
+		}
+		return rels;
+	}
+
 	protected Relation getRelation(final String source,
 		final String target,
 		final String relType,
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -57,14 +57,10 @@ class MappersTest {

 		final List<Oaf> list = new OafToOafMapper(vocs, false, true).processMdRecord(xml);

-		assertEquals(3, list.size());
-		assertTrue(list.get(0) instanceof Publication);
-		assertTrue(list.get(1) instanceof Relation);
-		assertTrue(list.get(2) instanceof Relation);
+		assertEquals(1, list.stream().filter(o -> o instanceof Publication).count());
+		assertEquals(4, list.stream().filter(o -> o instanceof Relation).count());

-		final Publication p = (Publication) list.get(0);
-		final Relation r1 = (Relation) list.get(1);
-		final Relation r2 = (Relation) list.get(2);
+		Publication p = (Publication) list.stream().filter(o -> o instanceof Publication).findFirst().get();

 		assertValidId(p.getId());

@ -125,26 +121,58 @@ class MappersTest {

 		assertNotNull(p.getBestaccessright());
 		assertEquals("OPEN", p.getBestaccessright().getClassid());
-		assertValidId(r1.getSource());
-		assertValidId(r1.getTarget());
-		assertValidId(r2.getSource());
-		assertValidId(r2.getTarget());
-		assertValidId(r1.getCollectedfrom().get(0).getKey());
-		assertValidId(r2.getCollectedfrom().get(0).getKey());
-		assertNotNull(r1.getDataInfo());
-		assertNotNull(r2.getDataInfo());
-		assertNotNull(r1.getDataInfo().getTrust());
-		assertNotNull(r2.getDataInfo().getTrust());
-		assertEquals(r1.getSource(), r2.getTarget());
-		assertEquals(r2.getSource(), r1.getTarget());
-		assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
-		assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
-		assertTrue(StringUtils.isNotBlank(r1.getRelType()));
-		assertTrue(StringUtils.isNotBlank(r2.getRelType()));
-		assertTrue(r1.getValidated());
-		assertTrue(r2.getValidated());
-		assertEquals("2020-01-01", r1.getValidationDate());
-		assertEquals("2020-01-01", r2.getValidationDate());
+
+		// RESULT PROJECT
+		List<Relation> resultProject = list
+			.stream()
+			.filter(o -> o instanceof Relation)
+			.map(o -> (Relation) o)
+			.filter(r -> ModelConstants.RESULT_PROJECT.equals(r.getRelType()))
+			.collect(Collectors.toList());
+
+		assertEquals(2, resultProject.size());
+		final Relation rp1 = resultProject.get(0);
+		final Relation rp2 = resultProject.get(1);
+
+		verifyRelation(rp1);
+		verifyRelation(rp2);
+
+		assertTrue(rp1.getValidated());
+		assertTrue(rp2.getValidated());
+		assertEquals("2020-01-01", rp1.getValidationDate());
+		assertEquals("2020-01-01", rp2.getValidationDate());
+
+		assertEquals(rp1.getSource(), rp2.getTarget());
+		assertEquals(rp2.getSource(), rp1.getTarget());
+
+		// AFFILIATIONS
+		List<Relation> affiliation = list
+			.stream()
+			.filter(o -> o instanceof Relation)
+			.map(o -> (Relation) o)
+			.filter(r -> ModelConstants.RESULT_ORGANIZATION.equals(r.getRelType()))
+			.collect(Collectors.toList());
+
+		assertEquals(2, affiliation.size());
+		final Relation aff1 = affiliation.get(0);
+		final Relation aff2 = affiliation.get(1);
+
+		verifyRelation(aff1);
+		verifyRelation(aff2);
+
+		assertEquals(aff1.getSource(), aff2.getTarget());
+		assertEquals(aff2.getSource(), aff1.getTarget());
+	}
+
+	private void verifyRelation(Relation r) {
+		assertValidId(r.getSource());
+		assertValidId(r.getTarget());
+		assertValidId(r.getCollectedfrom().get(0).getKey());
+		assertNotNull(r.getDataInfo());
+		assertNotNull(r.getDataInfo().getTrust());
+		assertTrue(StringUtils.isNotBlank(r.getRelClass()));
+		assertTrue(StringUtils.isNotBlank(r.getRelType()));
+
 	}

 	@Test
@ -734,6 +762,51 @@ class MappersTest {
 		assertFalse(p_cleaned.getTitle().isEmpty());
 	}

+	@Test
+	void testZenodo() throws IOException, DocumentException {
+		final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo.xml")));
+		final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
+
+		System.out.println("***************");
+		System.out.println(new ObjectMapper().writeValueAsString(list));
+		System.out.println("***************");
+
+		final Publication p = (Publication) list.get(0);
+		assertValidId(p.getId());
+		assertValidId(p.getCollectedfrom().get(0).getKey());
+
+		assertNotNull(p.getTitle());
+		assertFalse(p.getTitle().isEmpty());
+		assertEquals(1, p.getTitle().size());
+		assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
+
+		assertNotNull(p.getAuthor());
+		assertEquals(2, p.getAuthor().size());
+
+		Author author = p
+			.getAuthor()
+			.stream()
+			.filter(a -> a.getPid().stream().anyMatch(pi -> pi.getValue().equals("0000-0003-3272-8007")))
+			.findFirst()
+			.get();
+		assertNotNull(author);
+		assertTrue(StringUtils.isBlank(author.getSurname()));
+		assertTrue(StringUtils.isBlank(author.getName()));
+		assertEquals("Anne van Weerden", author.getFullname());
+
+		author = p
+			.getAuthor()
+			.stream()
+			.filter(a -> a.getPid().stream().anyMatch(pi -> pi.getValue().equals("0000-0003-3272-8008")))
+			.findFirst()
+			.get();
+		assertNotNull(author);
+		assertFalse(StringUtils.isBlank(author.getSurname()));
+		assertFalse(StringUtils.isBlank(author.getName()));
+		assertFalse(StringUtils.isBlank(author.getFullname()));
+
+	}
+
 	@Test
 	void testOdfFromHdfs() throws IOException, DocumentException {
 		final String xml = IOUtils
@ -835,6 +908,20 @@ class MappersTest {
 		assertEquals("EUR", p.getProcessingchargecurrency().getValue());
 	}

+	@Test
+	void testROHub() throws IOException, DocumentException {
+		final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("rohub.xml")));
+		final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
+		System.out.println("***************");
+		System.out.println(new ObjectMapper().writeValueAsString(list));
+		System.out.println("***************");
+//		final Dataset p = (Dataset) list.get(0);
+//		assertValidId(p.getId());
+//		assertValidId(p.getCollectedfrom().get(0).getKey());
+//		System.out.println(p.getTitle().get(0).getValue());
+//		assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
+	}
+
 	private void assertValidId(final String id) {
 		// System.out.println(id);

--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt
@ -497,6 +497,7 @@ dnet:publication_resource @=@ 0044 @=@ Graduate diploma
 dnet:publication_resource @=@ 0044 @=@ Undergraduate diploma
 dnet:publication_resource @=@ 0000 @=@ UNKNOWN
 dnet:publication_resource @=@ 0042 @=@ EGI Virtual Appliance
+dnet:publication_resource @=@ 0048 @=@ RO-crate
 dnet:languages @=@ abk @=@ ab
 dnet:languages @=@ aar @=@ aa
 dnet:languages @=@ afr @=@ af
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt
@ -164,6 +164,7 @@ dnet:publication_resource @=@ dnet:publication_resource @=@ 0030 @=@ Sound
 dnet:publication_resource @=@ dnet:publication_resource @=@ 0044 @=@ Thesis
 dnet:publication_resource @=@ dnet:publication_resource @=@ 0000 @=@ Unknown
 dnet:publication_resource @=@ dnet:publication_resource @=@ 0042 @=@ Virtual Appliance
+dnet:publication_resource @=@ dnet:publication_resource @=@ 0048 @=@ Research Object
 ec:funding_typologies @=@ ec:funding_typologies @=@ ec:frameworkprogram @=@ frameworkprogram
 ec:funding_typologies @=@ ec:funding_typologies @=@ ec:program @=@ program
 ec:funding_typologies @=@ ec:funding_typologies @=@ ec:specificprogram @=@ specificprogram
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml
@ -60,6 +60,15 @@
    <oaf:fulltext>https://oneecosystem.pensoft.net/article/13718/</oaf:fulltext>
    <oaf:journal eissn="2367-8194" issn="">One Ecosystem</oaf:journal>
    <oaf:refereed>0001</oaf:refereed>
+    <oaf:relation relClass="hasAuthorInstitution"
+                  relType="resultOrganization"
+                  subRelType="affiliation"
+                  targetType="organization">ror_________::https://ror.org/02gdcn153</oaf:relation>
+    <oaf:relation relClass="isProducedBy"
+                  relType="resultProject"
+                  subRelType="outcome"
+                  targetType="project"
+                  validationDate="2020-01-01">corda_______::226852</oaf:relation>
  </metadata>
  <about xmlns:oai="http://www.openarchives.org/OAI/2.0/">
    <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo.xml
@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<record xmlns:oaf="http://namespace.openaire.eu/oaf"
+        xmlns:oai="http://www.openarchives.org/OAI/2.0/"
+        xmlns:datacite="http://datacite.org/schema/kernel-3"
+        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
+        xmlns:dri="http://www.driver-repository.eu/namespace/dri">>
+    <header xmlns="http://www.openarchives.org/OAI/2.0/">
+        <identifier>oai:zenodo.org:3406824</identifier>
+        <datestamp>2020-01-20T16:45:20Z</datestamp>
+        <setSpec>openaire</setSpec>
+        <dr:dateOfTransformation>2022-06-07T10:21:24.06Z</dr:dateOfTransformation>
+        <dri:objIdentifier>test________::92fe3efa47883b2f3401e6a4bd92e9d7</dri:objIdentifier>
+        <dri:dateOfCollection>2020-05-21T05:26:15.93Z</dri:dateOfCollection>
+        <dri:dateOfTransformation>2020-08-01T11:06:26.977Z</dri:dateOfTransformation>
+    </header>
+    <metadata>
+        <resource xmlns="http://datacite.org/schema/kernel-4"
+                  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                  xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
+            <identifier identifierType="DOI">10.5281/zenodo.3406824</identifier>
+            <alternateIdentifiers xmlns="http://datacite.org/schema/kernel-3">
+                <alternateIdentifier alternateIdentifierType="URL">http://dx.doi.org/10.5281/zenodo.3406824</alternateIdentifier>
+            </alternateIdentifiers>
+            <creators>
+                <creator>
+                    <creatorName>Anne van Weerden</creatorName>
+                    <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3272-8007</nameIdentifier>
+                    <affiliation>Utrecht University Library</affiliation>
+                </creator>
+                <creator>
+                    <creatorName>Anne van, Weerden</creatorName>
+                    <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3272-8008</nameIdentifier>
+                    <affiliation>Utrecht University Library</affiliation>
+                </creator>
+            </creators>
+            <titles>
+                <title>Helen Bayly and Catherine Disney as influences in the life of Sir William Rowan Hamilton</title>
+            </titles>
+            <publisher>Zenodo</publisher>
+            <publicationYear>2018</publicationYear>
+            <subjects>
+                <subject>Sir William Rowan Hamilton, Lady Helena Maria Hamilton Bayly, Catherine Disney, Ireland, history, biography, nineteenth century</subject>
+            </subjects>
+            <dates>
+                <date dateType="Issued">2018-12-28</date>
+            </dates>
+            <language>en</language>
+            <resourceType resourceTypeGeneral="JournalArticle"/>
+            <relatedIdentifiers>
+                <relatedIdentifier relatedIdentifierType="DOI" relationType="IsVersionOf">10.5281/zenodo.3406823</relatedIdentifier>
+            </relatedIdentifiers>
+            <rightsList>
+                <rights rightsURI="https://creativecommons.org/licenses/by/4.0/legalcode">Creative Commons Attribution 4.0 International</rights>
+                <rights rightsURI="info:eu-repo/semantics/openAccess">Open Access</rights>
+            </rightsList>
+            <descriptions>
+                <description descriptionType="Abstract"><p>In the 1880s Robert Graves published a biography about Sir William Rowan Hamilton (1805-1865), to which in a 1980 biography Thomas Hankins added further information. From these biographies a picture emerged of a man who was unhappily married because he had lost the love of his life, which raised the question how such an unhappy man could produce so much beautiful mathematics. In this article it is stated that a main cause for the unhappy picture is that Graves ignored the influence on one another of Hamilton and his wife Helen Bayly, and Hankins that of Hamilton and his first and lost love Catherine Disney. It is then shown that if these influences are taken into account a very different view on Hamilton;s private life arises, in which he was happily married to a wife who enabled him to work as he needed to.</p></description>
+            </descriptions>
+        </resource>
+        <oaf:identifier identifierType="doi">10.5281/zenodo.3406824</oaf:identifier>
+        <dr:CobjCategory type="publication">0001</dr:CobjCategory>
+        <oaf:dateAccepted>2018-12-28</oaf:dateAccepted>
+        <oaf:accessrights>OPEN</oaf:accessrights>
+        <oaf:license>https://creativecommons.org/licenses/by/4.0/legalcode</oaf:license>
+        <oaf:language>eng</oaf:language>
+        <oaf:hostedBy name="ZENODO" id="opendoar____::2659"/>
+        <oaf:collectedFrom name="ZENODO" id="opendoar____::2659"/>
+    </metadata>
+</record>
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml
@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<record xmlns:datacite="http://datacite.org/schema/kernel-4"
+        xmlns:dc="http://purl.org/dc/elements/1.1/"
+        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
+        xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+        xmlns:oaf="http://namespace.openaire.eu/oaf"
+        xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
+        xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <header xmlns="http://www.openarchives.org/OAI/2.0/">
+        <dri:objIdentifier>eosca5322f5f::4dd1aaf93ae136b65dc9ee4e6f76eac9</dri:objIdentifier>
+        <dri:recordIdentifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</dri:recordIdentifier>
+        <dri:dateOfCollection>2022-05-25T15:35:48.262Z</dri:dateOfCollection>
+        <oaf:datasourceprefix>eosca5322f5f</oaf:datasourceprefix>
+        <identifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</identifier>
+        <datestamp>2022-05-25T15:35:38Z</datestamp>
+        <setSpec>rohub_data</setSpec>
+        <setSpec>ro-crate_data</setSpec>
+        <dr:dateOfTransformation>2022-05-25T15:36:11.094Z</dr:dateOfTransformation>
+    </header>
+    <metadata>
+        <oaire:resource xmlns="http://namespace.openaire.eu/schema/oaire/">
+            <datacite:identifier identifierType="landingPage">https://w3id.org/ro-id/53aa90bf-c593-4e6d-923f-d4711ac4b0e1</datacite:identifier>
+            <datacite:alternateIdentifiers>
+                <datacite:alternateIdentifier alternateIdentifierType="URL">http://api.rohub.org/api/ros/53aa90bf-c593-4e6d-923f-d4711ac4b0e1/</datacite:alternateIdentifier>
+            </datacite:alternateIdentifiers>
+            <datacite:relatedIdentifiers>
+                <datacite:relatedIdentifier relatedIdentifierType="" relationType="">
+                    https://github.com/NordicESMhub/RELIANCE/blob/main/content/science/notebooks/air_quality_lockdown.ipynb
+                </datacite:relatedIdentifier>
+                <datacite:relatedIdentifier relatedIdentifierType="URI" relationType="IsPartOf">https://github.com/NordicESMhub/RELIANCE/blob/main/content/science/notebooks/air_quality_lockdown.ipynb</datacite:relatedIdentifier>
+                <datacite:relatedIdentifier relatedIdentifierType="" relationType="">
+                    https://nordicesmhub.github.io/RELIANCE/science/notebooks/air_quality_lockdown.html
+                </datacite:relatedIdentifier>
+                <datacite:relatedIdentifier relatedIdentifierType="URI" relationType="IsPartOf">https://nordicesmhub.github.io/RELIANCE/science/notebooks/air_quality_lockdown.html</datacite:relatedIdentifier>
+            </datacite:relatedIdentifiers>
+            <creators xmlns="http://datacite.org/schema/kernel-4">
+                <creator>
+                    <creator>
+                        <creatorName>Anne Fouilloux</creatorName>
+                    </creator>
+                </creator>
+            </creators>
+            <dates xmlns="http://datacite.org/schema/kernel-4">
+                <date dateType="Created">2021-12-19T21:18:33Z</date>
+            </dates>
+            <dc:descriptions>
+                <dc:description descriptionType="Abstract">The COVID-19 pandemic has led to significant reductions in economic activity, especially during lockdowns. Several studies has shown that the concentration of nitrogen dioxyde and particulate matter levels have reduced during lockdown events. Reductions in transportation sector emissions are most likely largely responsible for the NO2 anomalies. In this study, we analyze the impact of lockdown events on the air quality using data from Copernicus Atmosphere Monitoring Service over Europe and at selected locations.</dc:description>
+            </dc:descriptions>
+            <oaire:fundingReferences>
+                <oaire:fundingReference>
+                    <oaire:funderName>European Commission</oaire:funderName>
+                    <oaire:funderIdentifier funderIdentifierType="Crossref Funder ID">10.13039/501100000781</oaire:funderIdentifier>
+                    <oaire:awardNumber awardURI="">101017502</oaire:awardNumber>
+                    <oaire:awardTitle>Research Lifecycle Management for Earth Science Communities and Copernicus Users</oaire:awardTitle>
+                </oaire:fundingReference>
+            </oaire:fundingReferences>
+            <oaire:licenseCondition uri="https://opensource.org/licenses/MIT">MIT License</oaire:licenseCondition>
+            <dc:publisher>University of Oslo</dc:publisher>
+            <dc:publicationYear>2021</dc:publicationYear>
+            <oaire:resourceType resourceTypeGeneral="other research product" uri="http://purl.org/coar/resource_type/c_1843">RO-crate</oaire:resourceType>
+            <rightsList xmlns="http://datacite.org/schema/kernel-4">
+                <rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</rights>
+            </rightsList>
+            <sizes xmlns="http://datacite.org/schema/kernel-4">
+                <size>11.971 MB</size>
+            </sizes>
+            <subjects xmlns="http://datacite.org/schema/kernel-4">
+                <subject>Applied sciences</subject>
+                <subject>Meteorology</subject>
+                <subject>EOSC::RO-crate</subject>
+            </subjects>
+            <titles xmlns="http://datacite.org/schema/kernel-4">
+                <title>Impact of the Covid-19 Lockdown on Air quality over Europe</title>
+            </titles>
+        </oaire:resource>
+        <oaf:identifier identifierType="URL">https://w3id.org/ro-id/53aa90bf-c593-4e6d-923f-d4711ac4b0e1</oaf:identifier>
+        <dr:CobjCategory type="other">0048</dr:CobjCategory>
+        <oaf:dateAccepted/>
+        <oaf:accessrights>OPEN</oaf:accessrights>
+        <oaf:license>https://opensource.org/licenses/MIT</oaf:license>
+        <oaf:language>und</oaf:language>
+        <oaf:hostedBy id="eosc________::psnc::psnc.rohub" name="ROHub"/>
+        <oaf:collectedFrom id="eosc________::psnc::psnc.rohub" name="ROHub"/>
+    </metadata>
+    <about xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
+        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
+            <originDescription altered="true" harvestDate="2022-05-25T15:35:48.262Z">
+                <baseURL>https%3A%2F%2Fapi.rohub.org%2Fapi%2Foai2d%2F</baseURL>
+                <identifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</identifier>
+                <datestamp>2022-05-25T15:35:38Z</datestamp>
+                <metadataNamespace/>
+            </originDescription>
+        </provenance>
+        <oaf:datainfo>
+            <oaf:inferred>false</oaf:inferred>
+            <oaf:deletedbyinference>false</oaf:deletedbyinference>
+            <oaf:trust>0.9</oaf:trust>
+            <oaf:inferenceprovenance/>
+            <oaf:provenanceaction classid="sysimport:crosswalk"
+                                  classname="Harvested" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
+        </oaf:datainfo>
+    </about>
+</record>
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@ -107,11 +107,6 @@ compute stats TARGET.result_sources;
 create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
 compute stats TARGET.result_topics;

-create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-compute stats TARGET.result_apc;
-
-
-
 create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
 create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
 create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql
@ -127,6 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
 SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
 FROM ${openaire_db_name}.relation r
 WHERE r.reltype = 'resultOrganization'
+  and r.target like '50|%'
  and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;

 CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql
@ -93,7 +93,7 @@ where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
 CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
 SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
 FROM ${openaire_db_name}.relation r
-WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
+WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.target like '20|%' and r.datainfo.invisible=false;

 -- datasource sources:
 -- where the datasource info have been collected from.