imported dnet-modular-collector-service-rmi in dnet-core-components, imported dnet-modular-collector-service (and plugins) in dnet-data-services

2019-06-07 14:13:59 +02:00 · 2019-06-07 14:13:59 +02:00 · 7acac5986a
parent 1c192fbfee
commit 7acac5986a
147 changed files with 29511 additions and 2 deletions
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/functions/ParamValuesFunction.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/functions/ParamValuesFunction.java
@ -0,0 +1,12 @@
+package eu.dnetlib.data.collector.functions;
+
+import java.util.List;
+import java.util.Map;
+
+import eu.dnetlib.data.collector.rmi.ProtocolParameterValue;
+
+public interface ParamValuesFunction {
+
+	List<ProtocolParameterValue> findValues(String baseUrl, Map<String, String> params);
+
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/plugin/AbstractCollectorPlugin.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/plugin/AbstractCollectorPlugin.java
@ -0,0 +1,43 @@
+package eu.dnetlib.data.collector.plugin;
+
+import java.util.List;
+
+import org.springframework.beans.factory.annotation.Required;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.data.collector.plugin.CollectorPlugin;
+import eu.dnetlib.data.collector.rmi.ProtocolDescriptor;
+import eu.dnetlib.data.collector.rmi.ProtocolParameter;
+
+public abstract class AbstractCollectorPlugin implements CollectorPlugin {
+
+	private ProtocolDescriptor protocolDescriptor;
+
+	@Override
+	public final String getProtocol() {
+		return getProtocolDescriptor().getName();
+	}
+
+	@Override
+	public final List<String> listNameParameters() {
+		return Lists.newArrayList(Lists.transform(getProtocolDescriptor().getParams(), new Function<ProtocolParameter, String>() {
+
+			@Override
+			public String apply(final ProtocolParameter p) {
+				return p.getName();
+			}
+		}));
+	}
+
+	@Override
+	public final ProtocolDescriptor getProtocolDescriptor() {
+		return protocolDescriptor;
+	}
+
+	@Required
+	public void setProtocolDescriptor(final ProtocolDescriptor protocolDescriptor) {
+		this.protocolDescriptor = protocolDescriptor;
+	}
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/plugin/CollectorPlugin.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/plugin/CollectorPlugin.java
@ -0,0 +1,18 @@
+package eu.dnetlib.data.collector.plugin;
+
+import java.util.List;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import eu.dnetlib.data.collector.rmi.ProtocolDescriptor;
+
+public interface CollectorPlugin {
+
+	Iterable<String> collect(InterfaceDescriptor interfaceDescriptor, String fromDate, String untilDate) throws CollectorServiceException;
+
+	ProtocolDescriptor getProtocolDescriptor();
+
+	String getProtocol();
+
+	List<String> listNameParameters();
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/plugin/CollectorPluginErrorLogList.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/plugin/CollectorPluginErrorLogList.java
@ -0,0 +1,19 @@
+package eu.dnetlib.data.collector.plugin;
+
+import java.util.LinkedList;
+
+public class CollectorPluginErrorLogList extends LinkedList<String> {
+
+	private static final long serialVersionUID = -6925786561303289704L;
+
+	@Override
+	public String toString() {
+		String log = new String();
+		int index = 0;
+		for (String errorMessage : this) {
+			log += String.format("Retry #%s: %s / ", index++, errorMessage);
+		}
+		return log;
+	}
+
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/CollectorService.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/CollectorService.java
@ -0,0 +1,30 @@
+package eu.dnetlib.data.collector.rmi;
+
+import java.util.List;
+import java.util.Map;
+
+import javax.jws.WebParam;
+import javax.jws.WebService;
+import javax.xml.ws.wsaddressing.W3CEndpointReference;
+
+import eu.dnetlib.common.rmi.BaseService;
+
+@WebService(targetNamespace = "http://services.dnetlib.eu/")
+public interface CollectorService extends BaseService {
+
+	W3CEndpointReference collect(@WebParam(name = "interface") final InterfaceDescriptor interfaceDescriptor) throws CollectorServiceException;
+
+	W3CEndpointReference dateRangeCollect(
+			@WebParam(name = "interface") final InterfaceDescriptor interfaceDescriptor,
+			@WebParam(name = "from") final String from,
+			@WebParam(name = "until") final String until) throws CollectorServiceException;
+
+	List<ProtocolDescriptor> listProtocols();
+
+	List<ProtocolParameterValue> listValidValuesForParam(
+			@WebParam(name = "protocol") String protocol,
+			@WebParam(name = "baseUrl") String baseUrl,
+			@WebParam(name = "param") String param,
+			@WebParam(name = "otherParams") Map<String, String> otherParams) throws CollectorServiceException;
+
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/CollectorServiceException.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/CollectorServiceException.java
@ -0,0 +1,25 @@
+package eu.dnetlib.data.collector.rmi;
+
+import eu.dnetlib.common.rmi.RMIException;
+
+public class CollectorServiceException extends RMIException {
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 7523999812098059764L;
+
+	public CollectorServiceException(String string) {
+		super(string);
+	}
+	
+	
+	public CollectorServiceException(String string, Throwable exception) {
+		super(string, exception);
+	}
+
+	public CollectorServiceException(Throwable exception) {
+		super(exception);
+	}
+
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/CollectorServiceRuntimeException.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/CollectorServiceRuntimeException.java
@ -0,0 +1,22 @@
+package eu.dnetlib.data.collector.rmi;
+
+public class CollectorServiceRuntimeException extends RuntimeException {
+
+	/**
+	 *
+	 */
+	private static final long serialVersionUID = 6317717870955037359L;
+
+	public CollectorServiceRuntimeException(final String string) {
+		super(string);
+	}
+
+	public CollectorServiceRuntimeException(final String string, final Throwable exception) {
+		super(string, exception);
+	}
+
+	public CollectorServiceRuntimeException(final Throwable exception) {
+		super(exception);
+	}
+
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/InterfaceDescriptor.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/InterfaceDescriptor.java
@ -0,0 +1,70 @@
+package eu.dnetlib.data.collector.rmi;
+
+import java.util.HashMap;
+
+import javax.xml.bind.annotation.XmlRootElement;
+
+import org.dom4j.Node;
+import org.springframework.beans.factory.annotation.Required;
+
+import com.google.common.collect.Maps;
+
+@XmlRootElement
+public class InterfaceDescriptor {
+
+	private String id;
+
+	private String baseUrl;
+
+	private String protocol;
+
+	private HashMap<String, String> params = Maps.newHashMap();
+
+	public String getBaseUrl() {
+		return baseUrl;
+	}
+
+	public void setBaseUrl(final String baseUrl) {
+		this.baseUrl = baseUrl;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	@Required
+	public void setId(final String id) {
+		this.id = id;
+	}
+
+	public HashMap<String, String> getParams() {
+		return params;
+	}
+
+	public void setParams(final HashMap<String, String> params) {
+		this.params = params;
+	}
+
+	public String getProtocol() {
+		return protocol;
+	}
+
+	public void setProtocol(final String protocol) {
+		this.protocol = protocol;
+	}
+
+	public static InterfaceDescriptor newInstance(final Node node) {
+		final InterfaceDescriptor ifc = new InterfaceDescriptor();
+		ifc.setId(node.valueOf("./@id"));
+		ifc.setBaseUrl(node.valueOf("./BASE_URL"));
+		ifc.setProtocol(node.valueOf("./ACCESS_PROTOCOL"));
+
+		for (Object o : node.selectNodes("./ACCESS_PROTOCOL/@*")) {
+			final Node n = (Node) o;
+			ifc.getParams().put(n.getName(), n.getText());
+		}
+
+		return ifc;
+	}
+
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/ProtocolDescriptor.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/ProtocolDescriptor.java
@ -0,0 +1,39 @@
+package eu.dnetlib.data.collector.rmi;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.xml.bind.annotation.XmlRootElement;
+
+import org.springframework.beans.factory.annotation.Required;
+
+@XmlRootElement
+public class ProtocolDescriptor {
+
+	private String name;
+	private List<ProtocolParameter> params = new ArrayList<ProtocolParameter>();
+
+	public ProtocolDescriptor() {}
+
+	public ProtocolDescriptor(final String name, final List<ProtocolParameter> params) {
+		this.name = name;
+		this.params = params;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	@Required
+	public void setName(final String name) {
+		this.name = name;
+	}
+
+	public List<ProtocolParameter> getParams() {
+		return params;
+	}
+
+	public void setParams(final List<ProtocolParameter> params) {
+		this.params = params;
+	}
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/ProtocolParameter.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/ProtocolParameter.java
@ -0,0 +1,87 @@
+package eu.dnetlib.data.collector.rmi;
+
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.XmlTransient;
+
+import org.springframework.beans.factory.annotation.Required;
+
+import eu.dnetlib.data.collector.functions.ParamValuesFunction;
+
+@XmlRootElement
+public class ProtocolParameter {
+
+	private String name;
+	private boolean optional = false;
+	private ProtocolParameterType type = ProtocolParameterType.TEXT;
+	private String regex = null;
+	private transient ParamValuesFunction populateFunction = null;
+	private boolean functionPopulated = false;
+
+	public ProtocolParameter() {}
+
+	public ProtocolParameter(final String name, final boolean optional, final ProtocolParameterType type, final String regex) {
+		this(name, optional, type, regex, null);
+	}
+
+	public ProtocolParameter(final String name, final boolean optional, final ProtocolParameterType type, final String regex,
+			final ParamValuesFunction populateFunction) {
+		this.name = name;
+		this.optional = optional;
+		this.type = type;
+		this.regex = regex;
+		this.populateFunction = populateFunction;
+		this.functionPopulated = this.populateFunction != null;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	@Required
+	public void setName(final String name) {
+		this.name = name;
+	}
+
+	public boolean isOptional() {
+		return optional;
+	}
+
+	public void setOptional(final boolean optional) {
+		this.optional = optional;
+	}
+
+	public ProtocolParameterType getType() {
+		return type;
+	}
+
+	public void setType(final ProtocolParameterType type) {
+		this.type = type;
+	}
+
+	public String getRegex() {
+		return regex;
+	}
+
+	public void setRegex(final String regex) {
+		this.regex = regex;
+	}
+
+	@XmlTransient
+	public ParamValuesFunction getPopulateFunction() {
+		return populateFunction;
+	}
+
+	public void setPopulateFunction(final ParamValuesFunction populateFunction) {
+		this.populateFunction = populateFunction;
+		this.functionPopulated = this.populateFunction != null;
+	}
+
+	public boolean isFunctionPopulated() {
+		return functionPopulated;
+	}
+
+	public void setFunctionPopulated(final boolean functionPopulated) {
+		this.functionPopulated = functionPopulated;
+	}
+
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/ProtocolParameterType.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/ProtocolParameterType.java
@ -0,0 +1,8 @@
+package eu.dnetlib.data.collector.rmi;
+
+import javax.xml.bind.annotation.XmlEnum;
+
+@XmlEnum
+public enum ProtocolParameterType {
+	TEXT, NUMBER, LIST, BOOLEAN
+}
--- a/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/ProtocolParameterValue.java
+++ b/dnet-core-components/src/main/java/eu/dnetlib/data/collector/rmi/ProtocolParameterValue.java
@ -0,0 +1,34 @@
+package eu.dnetlib.data.collector.rmi;
+
+import javax.xml.bind.annotation.XmlRootElement;
+
+@XmlRootElement
+public class ProtocolParameterValue {
+
+	private String id;
+	private String name;
+
+	public ProtocolParameterValue() {}
+
+	public ProtocolParameterValue(final String id, final String name) {
+		this.id = id;
+		this.name = name;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(final String id) {
+		this.id = id;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public void setName(final String name) {
+		this.name = name;
+	}
+
+}
--- a/dnet-data-services/pom.xml
+++ b/dnet-data-services/pom.xml
@ -23,11 +23,42 @@
 			<version>${project.version}</version>
 		</dependency>

-
+		<dependency>
+			<groupId>org.json</groupId>
+			<artifactId>json</artifactId>
+		</dependency>
 		<dependency>
 			<groupId>com.ximpleware</groupId>
 			<artifactId>vtd-xml</artifactId>
 		</dependency>
+		<dependency>
+			<groupId>com.jcraft</groupId>
+			<artifactId>jsch</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.commons</groupId>
+			<artifactId>commons-compress</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>commons-net</groupId>
+			<artifactId>commons-net</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.commons</groupId>
+			<artifactId>commons-csv</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.poi</groupId>
+			<artifactId>poi</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.poi</groupId>
+			<artifactId>poi-ooxml</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.jsoup</groupId>
+			<artifactId>jsoup</artifactId>
+		</dependency>

 	</dependencies>

--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/CollectorPluginEnumerator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/CollectorPluginEnumerator.java
@ -0,0 +1,55 @@
+package eu.dnetlib.data.collector;
+
+import java.util.Collection;
+
+import org.springframework.beans.BeansException;
+import org.springframework.beans.factory.BeanFactory;
+import org.springframework.beans.factory.BeanFactoryAware;
+import org.springframework.beans.factory.ListableBeanFactory;
+
+import eu.dnetlib.data.collector.plugin.CollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+
+public class CollectorPluginEnumerator implements BeanFactoryAware {
+
+	// private static final Log log = LogFactory.getLog(CollectorPluginEnumerator.class); // NOPMD by marko on 11/24/08 5:02 PM
+
+	/**
+	 * bean factory.
+	 */
+	private ListableBeanFactory beanFactory;
+
+	/**
+	 * Get all beans implementing the CollectorPlugin interface.
+	 * 
+	 * @return the set of eu.dnetlib.data.collector.plugin.CollectorPlugin(s)
+	 */
+	public Collection<CollectorPlugin> getAll() {
+		return beanFactory.getBeansOfType(CollectorPlugin.class).values();
+	}
+
+	@Override
+	public void setBeanFactory(final BeanFactory beanFactory) throws BeansException {
+		this.beanFactory = (ListableBeanFactory) beanFactory;
+	}
+
+	public ListableBeanFactory getBeanFactory() {
+		return beanFactory;
+	}
+
+	/**
+	 * Get given CollectorPlugin or throws exception.
+	 * 
+	 * @param protocol the given protocol
+	 * @return a CollectorPlugin compatible with the given protocol
+	 * @throws CollectorServiceException when no suitable plugin is found
+	 */
+	public CollectorPlugin get(final String protocol) throws CollectorServiceException {
+		for (CollectorPlugin cp : getAll()) {
+			if (protocol.equalsIgnoreCase(cp.getProtocol())) {
+				return cp;
+			}
+		}
+		throw new CollectorServiceException("plugin not found for protocol: " + protocol);
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/CollectorServiceImpl.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/CollectorServiceImpl.java
@ -0,0 +1,77 @@
+package eu.dnetlib.data.collector;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import javax.annotation.Resource;
+import javax.xml.ws.wsaddressing.W3CEndpointReference;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.data.collector.plugin.CollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorService;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import eu.dnetlib.data.collector.rmi.ProtocolDescriptor;
+import eu.dnetlib.data.collector.rmi.ProtocolParameter;
+import eu.dnetlib.data.collector.rmi.ProtocolParameterValue;
+import eu.dnetlib.enabling.resultset.IterableResultSetFactory;
+import eu.dnetlib.enabling.tools.AbstractBaseService;
+
+public class CollectorServiceImpl extends AbstractBaseService implements CollectorService {
+
+	@Resource
+	private CollectorPluginEnumerator collectorPluginEnumerator;
+
+	@Resource
+	private IterableResultSetFactory iterableResultSetFactory;
+
+	@Override
+	public W3CEndpointReference collect(final InterfaceDescriptor ifDescriptor) throws CollectorServiceException {
+		return dateRangeCollect(ifDescriptor, null, null);
+	}
+
+	@Override
+	public W3CEndpointReference dateRangeCollect(
+			final InterfaceDescriptor ifDescriptor, final String from, final String until)
+			throws CollectorServiceException {
+		final CollectorPlugin plugin = collectorPluginEnumerator.get(ifDescriptor.getProtocol());
+
+		if (!verifyParams(ifDescriptor.getParams().keySet(), Sets.newHashSet(plugin.listNameParameters()))) { throw new CollectorServiceException(
+				"Invalid parameters, valid: " + plugin.listNameParameters() + ", current: " + ifDescriptor.getParams().keySet()); }
+
+		final Iterable<String> iter = plugin.collect(ifDescriptor, from, until);
+
+		return iterableResultSetFactory.createIterableResultSet(iter);
+	}
+
+	@Override
+	public List<ProtocolDescriptor> listProtocols() {
+		final List<ProtocolDescriptor> list = Lists.newArrayList();
+		for (CollectorPlugin plugin : collectorPluginEnumerator.getAll()) {
+			list.add(plugin.getProtocolDescriptor());
+		}
+		return list;
+	}
+
+	@Override
+	public List<ProtocolParameterValue> listValidValuesForParam(final String protocol,
+			final String baseUrl,
+			final String param,
+			final Map<String, String> otherParams) throws CollectorServiceException {
+		final CollectorPlugin plugin = collectorPluginEnumerator.get(protocol);
+
+		for (ProtocolParameter pp : plugin.getProtocolDescriptor().getParams()) {
+			if (pp.getName().equals(param) && pp.isFunctionPopulated()) { return pp.getPopulateFunction().findValues(baseUrl, otherParams); }
+		}
+
+		return Lists.newArrayList();
+	}
+
+	private boolean verifyParams(final Set<String> curr, final Set<String> valid) {
+		return valid.containsAll(curr);
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/functions/ListOaiSetsFunction.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/functions/ListOaiSetsFunction.java
@ -0,0 +1,56 @@
+package eu.dnetlib.data.collector.functions;
+
+import java.io.StringReader;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang3.StringUtils;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.io.SAXReader;
+import org.springframework.beans.factory.annotation.Required;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.data.collector.plugins.oaisets.OaiSetsIteratorFactory;
+import eu.dnetlib.data.collector.rmi.ProtocolParameterValue;
+
+public class ListOaiSetsFunction implements ParamValuesFunction {
+
+	private OaiSetsIteratorFactory oaiSetsIteratorFactory;
+
+	@Override
+	public List<ProtocolParameterValue> findValues(final String baseUrl, final Map<String, String> params) {
+		final SAXReader reader = new SAXReader();
+
+		final Iterator<ProtocolParameterValue> iter = Iterators.transform(oaiSetsIteratorFactory.newIterator(baseUrl),
+				new Function<String, ProtocolParameterValue>() {
+
+					@Override
+					public ProtocolParameterValue apply(final String s) {
+						try {
+							final Document doc = reader.read(new StringReader(s));
+							final String id = doc.valueOf("//*[local-name()='setSpec']");
+							final String name = doc.valueOf("//*[local-name()='setName']");
+							return new ProtocolParameterValue(id,
+									(StringUtils.isBlank(name) || name.equalsIgnoreCase(id)) ? id : id + " - name: \"" + name + "\"");
+						} catch (final DocumentException e) {
+							throw new RuntimeException("Error in ListSets", e);
+						}
+					}
+				});
+		return Lists.newArrayList(iter);
+	}
+
+	public OaiSetsIteratorFactory getOaiSetsIteratorFactory() {
+		return oaiSetsIteratorFactory;
+	}
+
+	@Required
+	public void setOaiSetsIteratorFactory(final OaiSetsIteratorFactory oaiSetsIteratorFactory) {
+		this.oaiSetsIteratorFactory = oaiSetsIteratorFactory;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/AbstractSplittedRecordPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/AbstractSplittedRecordPlugin.java
@ -0,0 +1,38 @@
+package eu.dnetlib.data.collector.plugins;
+
+import java.io.BufferedInputStream;
+import java.util.Iterator;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import eu.dnetlib.miscutils.iterators.xml.XMLIterator;
+
+public abstract class AbstractSplittedRecordPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+		final String element = interfaceDescriptor.getParams().get("splitOnElement");
+
+		if (StringUtils.isBlank(baseUrl)) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
+
+		if (StringUtils.isBlank(element)) { throw new CollectorServiceException("Param 'splitOnElement' is null or empty"); }
+
+		final BufferedInputStream bis = getBufferedInputStream(baseUrl);
+
+		return new Iterable<String>() {
+
+			@Override
+			public Iterator<String> iterator() {
+				return new XMLIterator(element, bis);
+			}
+		};
+	}
+
+	abstract protected BufferedInputStream getBufferedInputStream(final String baseUrl) throws CollectorServiceException;
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/ClasspathCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/ClasspathCollectorPlugin.java
@ -0,0 +1,19 @@
+package eu.dnetlib.data.collector.plugins;
+
+import java.io.BufferedInputStream;
+import java.net.URL;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+
+public class ClasspathCollectorPlugin extends AbstractSplittedRecordPlugin {
+
+	@Override
+	protected BufferedInputStream getBufferedInputStream(final String baseUrl) throws CollectorServiceException {
+		try {
+			return new BufferedInputStream(getClass().getResourceAsStream(new URL(baseUrl).getPath()));
+		} catch (Exception e) {
+			throw new CollectorServiceException("Error dowloading url: " + baseUrl);
+		}
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/FileCSVCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/FileCSVCollectorPlugin.java
@ -0,0 +1,149 @@
+package eu.dnetlib.data.collector.plugins;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+
+import org.apache.commons.io.input.BOMInputStream;
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dom4j.Document;
+import org.dom4j.DocumentHelper;
+import org.dom4j.Element;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ * Please use eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin instead
+ */
+@Deprecated
+public class FileCSVCollectorPlugin extends AbstractCollectorPlugin {
+
+	private static final Log log = LogFactory.getLog(FileCSVCollectorPlugin.class);
+
+	class FileCSVIterator implements Iterator<String> {
+
+		private String next;
+
+		private BufferedReader reader;
+
+		private String separator;
+		private String quote;
+
+		public FileCSVIterator(final BufferedReader reader, final String separator, final String quote) {
+			this.reader = reader;
+			this.separator = separator;
+			this.quote = quote;
+			next = calculateNext();
+		}
+
+		@Override
+		public boolean hasNext() {
+			return next != null;
+		}
+
+		@Override
+		public String next() {
+			final String s = next;
+			next = calculateNext();
+			return s;
+		}
+
+		private String calculateNext() {
+			try {
+				final Document document = DocumentHelper.createDocument();
+				final Element root = document.addElement("csvRecord");
+
+				String newLine = reader.readLine();
+
+				// FOR SOME FILES IT RETURN NULL ALSO IF THE FILE IS NOT READY DONE
+				if (newLine == null) {
+					newLine = reader.readLine();
+				}
+				if (newLine == null) {
+					log.info("there is no line, closing RESULT SET");
+
+					reader.close();
+					return null;
+				}
+				final String[] currentRow = newLine.split(separator);
+
+				if (currentRow != null) {
+
+					for (int i = 0; i < currentRow.length; i++) {
+						final String hAttribute = (headers != null) && (i < headers.length) ? headers[i] : "column" + i;
+
+						final Element row = root.addElement("column");
+						if (i == identifierNumber) {
+							row.addAttribute("isID", "true");
+						}
+						final String value = StringUtils.isBlank(quote) ? currentRow[i] : StringUtils.strip(currentRow[i], quote);
+
+						row.addAttribute("name", hAttribute).addText(value);
+					}
+					return document.asXML();
+				}
+			} catch (final IOException e) {
+				log.error("Error calculating next csv element", e);
+			}
+			return null;
+		}
+
+		@Override
+		public void remove() {
+			throw new UnsupportedOperationException();
+		}
+
+	}
+
+	private String[] headers = null;
+	private int identifierNumber;
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		final String header = interfaceDescriptor.getParams().get("header");
+		final String separator = StringEscapeUtils.unescapeJava(interfaceDescriptor.getParams().get("separator"));
+		final String quote = interfaceDescriptor.getParams().get("quote");
+
+		identifierNumber = Integer.parseInt(interfaceDescriptor.getParams().get("identifier"));
+		URL u = null;
+		try {
+			u = new URL(interfaceDescriptor.getBaseUrl());
+		} catch (final MalformedURLException e1) {
+			throw new CollectorServiceException(e1);
+		}
+		final String baseUrl = u.getPath();
+
+		log.info("base URL = " + baseUrl);
+
+		try {
+
+			final BufferedReader br = new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(baseUrl))));
+
+			if ((header != null) && "true".equals(header.toLowerCase())) {
+				final String[] tmpHeader = br.readLine().split(separator);
+				if (StringUtils.isNotBlank(quote)) {
+					int i = 0;
+					headers = new String[tmpHeader.length];
+					for (final String h : tmpHeader) {
+						headers[i] = StringUtils.strip(h, quote);
+						i++;
+					}
+				} else headers = tmpHeader;
+			}
+			return () -> new FileCSVIterator(br, separator, quote);
+		} catch (final Exception e) {
+			throw new CollectorServiceException(e);
+		}
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/FileCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/FileCollectorPlugin.java
@ -0,0 +1,20 @@
+package eu.dnetlib.data.collector.plugins;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.net.URL;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+
+public class FileCollectorPlugin extends AbstractSplittedRecordPlugin {
+
+	@Override
+	protected BufferedInputStream getBufferedInputStream(final String baseUrl) throws CollectorServiceException {
+		try {
+			return new BufferedInputStream(new FileInputStream(new URL(baseUrl).getPath()));
+		} catch (Exception e) {
+			throw new CollectorServiceException("Error reading file " + baseUrl, e);
+		}
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/FileGZipCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/FileGZipCollectorPlugin.java
@ -0,0 +1,23 @@
+package eu.dnetlib.data.collector.plugins;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.net.URL;
+import java.util.zip.GZIPInputStream;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+
+public class FileGZipCollectorPlugin extends AbstractSplittedRecordPlugin {
+
+	@Override
+	protected BufferedInputStream getBufferedInputStream(final String baseUrl) throws CollectorServiceException {
+
+		try {
+			GZIPInputStream stream = new GZIPInputStream(new FileInputStream(new URL(baseUrl).getPath()));
+			return new BufferedInputStream(stream);
+		} catch (Exception e) {
+			throw new CollectorServiceException(e);
+		}
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/HttpCSVCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/HttpCSVCollectorPlugin.java
@ -0,0 +1,170 @@
+package eu.dnetlib.data.collector.plugins;
+
+import java.io.*;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.Set;
+
+import com.google.common.collect.Iterators;
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.io.input.BOMInputStream;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dom4j.Document;
+import org.dom4j.DocumentHelper;
+import org.dom4j.Element;
+
+/**
+ * The Class HttpCSVCollectorPlugin.
+ */
+public class HttpCSVCollectorPlugin extends AbstractCollectorPlugin {
+
+	private static final Log log = LogFactory.getLog(HttpCSVCollectorPlugin.class);
+
+	public static final String UTF8_BOM = "\uFEFF";
+
+	/**
+	 * The Class HTTPCSVIterator.
+	 */
+	class HTTPCSVIterator implements Iterable<String> {
+
+		/** The descriptor. */
+		private InterfaceDescriptor descriptor;
+
+		/**
+		 * Instantiates a new HTTPCSV iterator.
+		 *
+		 * @param descriptor
+		 *            the descriptor
+		 */
+		public HTTPCSVIterator(final InterfaceDescriptor descriptor) {
+			this.descriptor = descriptor;
+		}
+
+		/**
+		 * Iterator.
+		 *
+		 * @return the iterator
+		 */
+		@SuppressWarnings("resource")
+		@Override
+		public Iterator<String> iterator() {
+
+			try {
+				final String separator = descriptor.getParams().get("separator");
+				final String identifier = descriptor.getParams().get("identifier");
+				final String quote = descriptor.getParams().get("quote");
+				final URL url = new URL(descriptor.getBaseUrl());
+				long nLines = 0;
+
+				// FIX
+				// This code should skip the lines with invalid quotes
+				final File tempFile = File.createTempFile("csv-", ".tmp");
+				try (InputStream is = url.openConnection().getInputStream();
+						BOMInputStream bomIs = new BOMInputStream(is);
+						BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs));
+						FileWriter fw = new FileWriter(tempFile)) {
+
+					String line;
+					while ((line = reader.readLine()) != null) {
+						if (StringUtils.isBlank(quote) || (quote.charAt(0) != '"') || verifyQuotes(line, separator.charAt(0))) {
+							fw.write(line);
+							fw.write("\n");
+							nLines++;
+						}
+					}
+				}
+				// END FIX
+
+				final CSVFormat format = CSVFormat.EXCEL
+						.withHeader()
+						.withDelimiter(separator.equals("\\t") || StringUtils.isBlank(separator) ? '\t' : separator.charAt(0))
+						.withQuote(StringUtils.isBlank(quote) ? null : quote.charAt(0))
+						.withTrim();
+
+				final CSVParser parser = new CSVParser(new FileReader(tempFile), format);
+				final Set<String> headers = parser.getHeaderMap().keySet();
+
+				final long nRecords = nLines - 1;
+
+				return Iterators.transform(parser.iterator(), input -> {
+					try {
+						final Document document = DocumentHelper.createDocument();
+						final Element root = document.addElement("csvRecord");
+						for (final String key : headers) {
+							final Element row = root.addElement("column");
+							row.addAttribute("name", key).addText(XmlCleaner.cleanAllEntities(input.get(key)));
+							if (key.equals(identifier)) {
+								row.addAttribute("isID", "true");
+							}
+						}
+
+						return document.asXML();
+					} finally {
+						log.debug(tempFile.getAbsolutePath());
+						if (parser.getRecordNumber() == nRecords) {
+							log.debug("DELETING " + tempFile.getAbsolutePath());
+							tempFile.delete();
+						}
+					}
+				});
+			} catch (final Exception e) {
+				log.error("Error iterating csv lines", e);
+				return null;
+			}
+		}
+
+	}
+
+	/*
+	 * (non-Javadoc)
+	 *
+	 * @see eu.dnetlib.data.collector.plugin.CollectorPlugin#collect(eu.dnetlib.data.collector.rmi.InterfaceDescriptor, java.lang.String,
+	 * java.lang.String)
+	 */
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor descriptor, final String fromDate, final String untilDate) throws CollectorServiceException {
+
+		return new HTTPCSVIterator(descriptor);
+	}
+
+	public boolean verifyQuotes(final String line, final char separator) {
+		final char[] cs = line.trim().toCharArray();
+		boolean inField = false;
+		boolean skipNext = false;
+		for (int i = 0; i < cs.length; i++) {
+			if (skipNext) {
+				skipNext = false;
+			} else if (inField) {
+				if ((cs[i] == '\"') && ((i == (cs.length - 1)) || (cs[i + 1] == separator))) {
+					inField = false;
+				} else if ((cs[i] == '\"') && (i < (cs.length - 1))) {
+					if ((cs[i + 1] == '\"')) {
+						skipNext = true;
+					} else {
+						log.warn("Skipped invalid line: " + line);
+						return false;
+					}
+				}
+			} else {
+				if ((cs[i] == '\"') && ((i == 0) || (cs[i - 1] == separator))) {
+					inField = true;
+				}
+			}
+		}
+
+		if (inField) {
+			log.warn("Skipped invalid line: " + line);
+			return false;
+		}
+
+		return true;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/HttpCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/HttpCollectorPlugin.java
@ -0,0 +1,39 @@
+package eu.dnetlib.data.collector.plugins;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import org.apache.commons.io.IOUtils;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.HttpClients;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class HttpCollectorPlugin extends AbstractSplittedRecordPlugin {
+
+	@Override
+	protected BufferedInputStream getBufferedInputStream(final String baseUrl) throws CollectorServiceException {
+		final HttpGet method = new HttpGet(baseUrl);
+
+		try(CloseableHttpResponse response = HttpClients.createDefault().execute(method)) {
+
+			int responseCode = response.getStatusLine().getStatusCode();
+
+			if (HttpStatus.SC_OK != responseCode) {
+				throw new CollectorServiceException("Error " + responseCode + " dowloading url: " + baseUrl);
+			}
+
+			byte[] content = IOUtils.toByteArray(response.getEntity().getContent());
+
+			try(InputStream in = new ByteArrayInputStream(content)) {
+				return new BufferedInputStream(in);
+			}
+		} catch (IOException e) {
+			throw new CollectorServiceException("Error dowloading url: " + baseUrl);
+		}
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
@ -0,0 +1,224 @@
+package eu.dnetlib.data.collector.plugins;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.*;
+import java.security.GeneralSecurityException;
+import java.security.cert.X509Certificate;
+import java.util.List;
+import java.util.Map;
+import javax.net.ssl.HttpsURLConnection;
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509TrustManager;
+
+import eu.dnetlib.data.collector.plugin.CollectorPluginErrorLogList;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @author jochen, michele, andrea
+ */
+public class HttpConnector {
+
+	private static final Log log = LogFactory.getLog(HttpConnector.class);
+
+	private int maxNumberOfRetry = 6;
+	private int defaultDelay = 120; // seconds
+	private int readTimeOut = 120; // seconds
+
+	private String responseType = null;
+
+	private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
+
+	public HttpConnector() {
+		CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
+	}
+
+	/**
+	 * Given the URL returns the content via HTTP GET
+	 *
+	 * @param requestUrl the URL
+	 * @return the content of the downloaded resource
+	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
+	 */
+	public String getInputSource(final String requestUrl) throws CollectorServiceException {
+		return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
+	}
+
+	/**
+	 * Given the URL returns the content as a stream via HTTP GET
+	 *
+	 * @param requestUrl the URL
+	 * @return the content of the downloaded resource as InputStream
+	 * @throws CollectorServiceException when retrying more than maxNumberOfRetry times
+	 */
+	public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorServiceException {
+		return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
+	}
+
+	private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
+			throws CollectorServiceException {
+		try {
+			InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
+			try {
+				return IOUtils.toString(s);
+			} catch (IOException e) {
+				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
+				Thread.sleep(defaultDelay * 1000);
+				errorList.add(e.getMessage());
+				return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
+			}
+			finally{
+				IOUtils.closeQuietly(s);
+			}
+		} catch (InterruptedException e) {
+			throw new CollectorServiceException(e);
+		}
+	}
+
+	private InputStream attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
+			throws CollectorServiceException {
+
+		if (retryNumber > maxNumberOfRetry) { throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); }
+
+		log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
+		try {
+			InputStream input = null;
+
+			try {
+				final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
+				urlConn.setInstanceFollowRedirects(false);
+				urlConn.setReadTimeout(readTimeOut * 1000);
+				urlConn.addRequestProperty("User-Agent", userAgent);
+
+				if (log.isDebugEnabled()) {
+					logHeaderFields(urlConn);
+				}
+
+				int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
+				if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
+					log.warn("waiting and repeating request after " + retryAfter + " sec.");
+					Thread.sleep(retryAfter * 1000);
+					errorList.add("503 Service Unavailable");
+					urlConn.disconnect();
+					return attemptDownload(requestUrl, retryNumber + 1, errorList);
+				} else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) || (urlConn.getResponseCode()
+						== HttpURLConnection.HTTP_MOVED_TEMP)) {
+					final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
+					log.debug("The requested url has been moved to " + newUrl);
+					errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
+					urlConn.disconnect();
+					return attemptDownload(newUrl, retryNumber + 1, errorList);
+				} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
+					log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
+					Thread.sleep(defaultDelay * 1000);
+					errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
+					urlConn.disconnect();
+					return attemptDownload(requestUrl, retryNumber + 1, errorList);
+				} else {
+					input = urlConn.getInputStream();
+					responseType = urlConn.getContentType();
+					return input;
+				}
+			} catch (IOException e) {
+				log.error("error while retrieving from http-connection occured: " + requestUrl, e);
+				Thread.sleep(defaultDelay * 1000);
+				errorList.add(e.getMessage());
+				return attemptDownload(requestUrl, retryNumber + 1, errorList);
+			}
+		} catch (InterruptedException e) {
+			throw new CollectorServiceException(e);
+		}
+	}
+
+	private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
+		log.debug("StatusCode: " + urlConn.getResponseMessage());
+
+		for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
+			if (e.getKey() != null) {
+				for (String v : e.getValue()) {
+					log.debug("  key: " + e.getKey() + " - value: " + v);
+				}
+			}
+		}
+	}
+
+	private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
+		for (String key : headerMap.keySet()) {
+			if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0) && NumberUtils.isCreatable(headerMap.get(key).get(0))) {
+				return Integer
+						.parseInt(headerMap.get(key).get(0)) + 10;
+			}
+		}
+		return -1;
+	}
+
+	private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorServiceException {
+		for (String key : headerMap.keySet()) {
+			if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) { return headerMap.get(key).get(0); }
+		}
+		throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING");
+	}
+
+	/**
+	 * register for https scheme; this is a workaround and not intended for the use in trusted environments
+	 */
+	public void initTrustManager() {
+		final X509TrustManager tm = new X509TrustManager() {
+
+			@Override
+			public void checkClientTrusted(final X509Certificate[] xcs, final String string) {
+			}
+
+			@Override
+			public void checkServerTrusted(final X509Certificate[] xcs, final String string) {
+			}
+
+			@Override
+			public X509Certificate[] getAcceptedIssuers() {
+				return null;
+			}
+		};
+		try {
+			final SSLContext ctx = SSLContext.getInstance("TLS");
+			ctx.init(null, new TrustManager[] { tm }, null);
+			HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
+		} catch (GeneralSecurityException e) {
+			log.fatal(e);
+			throw new IllegalStateException(e);
+		}
+	}
+
+	public int getMaxNumberOfRetry() {
+		return maxNumberOfRetry;
+	}
+
+	public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
+		this.maxNumberOfRetry = maxNumberOfRetry;
+	}
+
+	public int getDefaultDelay() {
+		return defaultDelay;
+	}
+
+	public void setDefaultDelay(final int defaultDelay) {
+		this.defaultDelay = defaultDelay;
+	}
+
+	public int getReadTimeOut() {
+		return readTimeOut;
+	}
+
+	public void setReadTimeOut(final int readTimeOut) {
+		this.readTimeOut = readTimeOut;
+	}
+
+	public String getResponseType() {
+		return responseType;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/targz/TarGzCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/targz/TarGzCollectorPlugin.java
@ -0,0 +1,24 @@
+package eu.dnetlib.data.collector.plugins.archive.targz;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ * Collector pluging for collecting a .tar.gz folder of records
+ *
+ * @author andrea
+ *
+ */
+public class TarGzCollectorPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
+		return new TarGzIterable(interfaceDescriptor);
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/targz/TarGzIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/targz/TarGzIterable.java
@ -0,0 +1,48 @@
+package eu.dnetlib.data.collector.plugins.archive.targz;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+
+import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ * The Class TarGzIterable.
+ *
+ * @author Andrea
+ */
+public class TarGzIterable implements Iterable<String> {
+
+	/** The path to tar.gz archive. */
+	private File tarGzFile;
+
+	public TarGzIterable(final InterfaceDescriptor interfaceDescriptor) throws CollectorServiceException {
+		try {
+			final String tarGzPath = interfaceDescriptor.getBaseUrl();
+			URL tarGzUrl = new URL(tarGzPath);
+			this.tarGzFile = new File(tarGzUrl.getPath());
+			if (!tarGzFile.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", tarGzFile.getPath())); }
+		} catch (MalformedURLException e) {
+			throw new CollectorServiceException("TarGz collector failed! ", e);
+		}
+	}
+
+	@Override
+	public Iterator<String> iterator() {
+		final TarGzIterator tgzIterator = new TarGzIterator(tarGzFile.getAbsolutePath());
+		return Iterators.transform(tgzIterator, new Function<String, String>() {
+
+			@Override
+			public String apply(final String inputRecord) {
+				return XmlCleaner.cleanAllEntities(inputRecord.startsWith("\uFEFF") ? inputRecord.substring(1) : inputRecord);
+			}
+		});
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/targz/TarGzIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/targz/TarGzIterator.java
@ -0,0 +1,86 @@
+package eu.dnetlib.data.collector.plugins.archive.targz;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class TarGzIterator implements Iterator<String> {
+
+	/** The Constant log. */
+	private static final Log log = LogFactory.getLog(TarGzIterator.class);
+
+	private TarArchiveInputStream tarInputStream;
+	private String current;
+
+	public TarGzIterator(final String tarGzPath) {
+		try {
+			this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(tarGzPath))));
+			this.current = findNext();
+		} catch (FileNotFoundException e) {
+			log.error("Tar.gz file not found: " + tarGzPath, e);
+		} catch (IOException e) {
+			log.error("Problem opening tar.gz file " + tarGzPath, e);
+		}
+	}
+
+	public TarGzIterator(final File tarGzFile) {
+		try {
+			this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(tarGzFile))));
+			this.current = findNext();
+		} catch (FileNotFoundException e) {
+			log.error("Tar.gz file not found: " + tarGzFile.getAbsolutePath(), e);
+		} catch (IOException e) {
+			log.error("Problem opening tar.gz file " + tarGzFile.getAbsolutePath(), e);
+		}
+	}
+
+	@Override
+	public boolean hasNext() {
+		return current != null;
+	}
+
+	@Override
+	public String next() {
+		String ret = new String(current);
+		current = findNext();
+		return ret;
+	}
+
+	@Override
+	public void remove() {}
+
+	private synchronized String findNext() {
+		TarArchiveEntry entry = null;
+		try {
+			while (null != (entry = tarInputStream.getNextTarEntry()) && !entry.isFile()) {
+				log.debug("Skipping TAR entry " + entry.getName());
+			}
+		} catch (IOException e) {
+			log.error("Error during tar.gz extraction", e);
+		}
+
+		if (entry == null) {
+			return null;
+		} else {
+			log.debug("Extracting " + entry.getName());
+			byte[] content = new byte[(int) entry.getSize()];
+			try {
+				tarInputStream.read(content, 0, content.length);
+				return new String(content);
+			} catch (IOException e) {
+				log.error("Impossible to extract file " + entry.getName(), e);
+				return null;
+			}
+
+		}
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/zip/ZipCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/zip/ZipCollectorPlugin.java
@ -0,0 +1,24 @@
+package eu.dnetlib.data.collector.plugins.archive.zip;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ * Collector pluging for collecting a zipped folder of records
+ *
+ * @author Andrea
+ *
+ */
+public class ZipCollectorPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
+		return new ZipIterable(interfaceDescriptor);
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/zip/ZipIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/zip/ZipIterable.java
@ -0,0 +1,48 @@
+package eu.dnetlib.data.collector.plugins.archive.zip;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+
+import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ *
+ * @author Andrea
+ *
+ */
+public class ZipIterable implements Iterable<String> {
+
+	/** The path to .zip archive. */
+	private File zipFile;
+
+	public ZipIterable(final InterfaceDescriptor interfaceDescriptor) throws CollectorServiceException {
+		try {
+			final String zipPath = interfaceDescriptor.getBaseUrl();
+			URL zipUrl = new URL(zipPath);
+			this.zipFile = new File(zipUrl.getPath());
+			if (!zipFile.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", zipFile.getPath())); }
+		} catch (MalformedURLException e) {
+			throw new CollectorServiceException("Zip collector failed! ", e);
+		}
+	}
+
+	@Override
+	public Iterator<String> iterator() {
+		final ZipIterator zipIterator = new ZipIterator(zipFile.getAbsolutePath());
+		return Iterators.transform(zipIterator, new Function<String, String>() {
+
+			@Override
+			public String apply(final String inputRecord) {
+				return XmlCleaner.cleanAllEntities(inputRecord.startsWith("\uFEFF") ? inputRecord.substring(1) : inputRecord);
+			}
+		});
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/zip/ZipIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/archive/zip/ZipIterator.java
@ -0,0 +1,80 @@
+package eu.dnetlib.data.collector.plugins.archive.zip;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class ZipIterator implements Iterator<String> {
+
+	/** The Constant log. */
+	private static final Log log = LogFactory.getLog(ZipIterator.class);
+
+	ZipFile zipFile;
+	Enumeration<? extends ZipEntry> entries;
+	private String current;
+
+	public ZipIterator(final String zipPath) {
+		try {
+			this.zipFile = new ZipFile(zipPath);
+			this.entries = zipFile.entries();
+			this.current = findNext();
+		} catch (IOException e) {
+			log.error("Problems opening the .zip file " + zipPath, e);
+		}
+	}
+
+	public ZipIterator(final File file) {
+		try {
+			this.zipFile = new ZipFile(file);
+			this.entries = zipFile.entries();
+			this.current = findNext();
+		} catch (IOException e) {
+			log.error("Problems opening the .zip file " + zipFile.getName(), e);
+		}
+	}
+
+	@Override
+	public boolean hasNext() {
+		return current != null;
+	}
+
+	@Override
+	public String next() {
+		String ret = new String(current);
+		current = findNext();
+		return ret;
+	}
+
+	@Override
+	public void remove() {}
+
+	private synchronized String findNext() {
+		ZipEntry entry = null;
+		while (entries.hasMoreElements() && (entry = entries.nextElement()).isDirectory()) {
+			log.debug("Skipping Zip entry " + entry.getName());
+		}
+
+		if (entry == null) {
+			return null;
+		} else {
+			log.debug("Extracting " + entry.getName());
+			try {
+				InputStream stream = zipFile.getInputStream(entry);
+				return IOUtils.toString(stream);
+			} catch (IOException e) {
+				log.error("Problems extracting entry " + entry.getName(), e);
+				return null;
+			}
+		}
+
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datacite/DataciteCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datacite/DataciteCollectorPlugin.java
@ -0,0 +1,51 @@
+package eu.dnetlib.data.collector.plugins.datacite;
+
+import java.text.ParseException;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.plugin.CollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class DataciteCollectorPlugin extends AbstractCollectorPlugin implements CollectorPlugin {
+
+	private static final Log log = LogFactory.getLog(DataciteCollectorPlugin.class);
+
+	private DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd");
+
+	@Override
+	public Iterable<String> collect(InterfaceDescriptor interfaceDescriptor, String fromDate, String untilDate) throws CollectorServiceException {
+
+		String baseurl = interfaceDescriptor.getBaseUrl();
+		if (StringUtils.isBlank(baseurl)) throw new CollectorServiceException("baseUrl cannot be empty");
+		long timestamp = 0;
+		if (StringUtils.isNotBlank(fromDate)) {
+			try {
+				Date date = org.apache.commons.lang.time.DateUtils.parseDate(
+						fromDate,
+						new String[] { "yyyy-MM-dd", "yyyy-MM-dd'T'HH:mm:ssXXX", "yyyy-MM-dd'T'HH:mm:ss.SSSX", "yyyy-MM-dd'T'HH:mm:ssZ",
+								"yyyy-MM-dd'T'HH:mm:ss.SX" });
+				//timestamp =parsed.getTime() /1000;
+				timestamp = date.toInstant().toEpochMilli() / 1000;
+				log.info("Querying for Datacite records from timestamp " + timestamp + " (date was " + fromDate + ")");
+
+			} catch (ParseException e) {
+				throw new CollectorServiceException(e);
+			}
+		}
+		final long finalTimestamp = timestamp;
+		return () -> {
+			try {
+				return new DataciteESIterator(finalTimestamp, baseurl);
+			} catch (Exception e) {
+				throw new RuntimeException(e);
+			}
+		};
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datacite/DataciteESIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datacite/DataciteESIterator.java
@ -0,0 +1,125 @@
+package eu.dnetlib.data.collector.plugins.datacite;
+
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayDeque;
+import java.util.Iterator;
+import java.util.Objects;
+import java.util.Queue;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import eu.dnetlib.data.collector.plugins.datacite.schema.DataciteSchema;
+import eu.dnetlib.data.collector.plugins.datacite.schema.Result;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+
+public class DataciteESIterator implements Iterator<String> {
+
+
+    private final long timestamp;
+
+    private String scrollId;
+
+    private Queue<String> currentPage;
+
+    private final Gson g =  new GsonBuilder().create();
+
+    private String baseURL = "http://ip-90-147-167-25.ct1.garrservices.it:5000";
+
+    private static final String START_PATH = "new_scan";
+    private static final String NEXT_PATH = "scan/%s";
+
+
+    public DataciteESIterator(long timestamp, String baseUrl) throws Exception {
+        this.timestamp = timestamp;
+        this.baseURL = baseUrl;
+        currentPage = new ArrayDeque<>();
+        startRequest();
+    }
+
+    private static String decompression(final Result r) {
+        try {
+            byte[] byteArray = Base64.decodeBase64(r.getBody().getBytes());
+            Inflater decompresser = new Inflater();
+            decompresser.setInput(byteArray);
+            ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
+            byte[] buffer = new byte[8192];
+            while (!decompresser.finished()) {
+                int size = decompresser.inflate(buffer);
+                bos.write(buffer, 0, size);
+            }
+            byte[] unzippeddata = bos.toByteArray();
+            decompresser.end();
+
+            return new String(unzippeddata);
+        } catch (DataFormatException e) {
+            return null;
+        }
+
+    }
+
+    private void fillQueue(final String hits) {
+        if (StringUtils.isBlank(hits) || "[]".equalsIgnoreCase(hits.trim()))
+            return;
+        try {
+            DataciteSchema datacitepage = g.fromJson(hits, DataciteSchema.class);
+            this.scrollId = datacitepage.getScrollId();
+            datacitepage.getResult().stream().map(DataciteESIterator::decompression).filter(Objects::nonNull).forEach(this.currentPage::add);
+        } catch (Throwable e) {
+            System.out.println(hits);
+            e.printStackTrace();
+        }
+    }
+
+    private void startRequest() throws Exception {
+        String url = baseURL+"/"+START_PATH;
+        final URL startUrl = new URL(timestamp >0 ? url + "?timestamp="+timestamp : url);
+        fillQueue(IOUtils.toString(startUrl.openStream()));
+    }
+
+    private void getNextPage() throws IOException {
+        String url = baseURL+"/"+NEXT_PATH;
+        final URL startUrl = new URL(String.format(url,scrollId));
+        fillQueue(IOUtils.toString(startUrl.openStream()));
+    }
+
+
+    @Override
+    public boolean hasNext() {
+        return  currentPage.size() >0;
+    }
+
+    @Override
+    public String next() {
+
+        if (currentPage.size() == 0) {
+
+            return null;
+        }
+
+        String nextItem = currentPage.remove();
+        if (currentPage.size() == 0) {
+            try {
+                getNextPage();
+            } catch (Throwable e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        return nextItem;
+    }
+
+    public String getBaseURL() {
+        return baseURL;
+    }
+
+    public void setBaseURL(final String baseURL) {
+        this.baseURL = baseURL;
+    }
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datacite/schema/DataciteSchema.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datacite/schema/DataciteSchema.java
@ -0,0 +1,55 @@
+
+package eu.dnetlib.data.collector.plugins.datacite.schema;
+
+import java.util.List;
+import com.google.gson.annotations.Expose;
+import com.google.gson.annotations.SerializedName;
+
+public class DataciteSchema {
+
+    @SerializedName("counter")
+    @Expose
+    private Integer counter;
+    @SerializedName("result")
+    @Expose
+    private List<Result> result = null;
+    @SerializedName("scroll_id")
+    @Expose
+    private String scrollId;
+    @SerializedName("total")
+    @Expose
+    private Integer total;
+
+    public Integer getCounter() {
+        return counter;
+    }
+
+    public void setCounter(Integer counter) {
+        this.counter = counter;
+    }
+
+    public List<Result> getResult() {
+        return result;
+    }
+
+    public void setResult(List<Result> result) {
+        this.result = result;
+    }
+
+    public String getScrollId() {
+        return scrollId;
+    }
+
+    public void setScrollId(String scrollId) {
+        this.scrollId = scrollId;
+    }
+
+    public Integer getTotal() {
+        return total;
+    }
+
+    public void setTotal(Integer total) {
+        this.total = total;
+    }
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datacite/schema/Result.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datacite/schema/Result.java
@ -0,0 +1,54 @@
+
+package eu.dnetlib.data.collector.plugins.datacite.schema;
+
+import com.google.gson.annotations.Expose;
+import com.google.gson.annotations.SerializedName;
+
+public class Result {
+
+    @SerializedName("body")
+    @Expose
+    private String body;
+    @SerializedName("id")
+    @Expose
+    private String id;
+    @SerializedName("originalId")
+    @Expose
+    private String originalId;
+    @SerializedName("timestamp")
+    @Expose
+    private Integer timestamp;
+
+    public String getBody() {
+        return body;
+    }
+
+    public void setBody(String body) {
+        this.body = body;
+    }
+
+    public String getId() {
+        return id;
+    }
+
+    public void setId(String id) {
+        this.id = id;
+    }
+
+    public String getOriginalId() {
+        return originalId;
+    }
+
+    public void setOriginalId(String originalId) {
+        this.originalId = originalId;
+    }
+
+    public Integer getTimestamp() {
+        return timestamp;
+    }
+
+    public void setTimestamp(Integer timestamp) {
+        this.timestamp = timestamp;
+    }
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByJournalIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByJournalIterator.java
@ -0,0 +1,115 @@
+package eu.dnetlib.data.collector.plugins.datasets;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * The Class DatasetsByProjectIterator.
+ */
+public class DatasetsByJournalIterator implements Iterable<String>, Iterator<String> {
+
+	/** The current iterator. */
+	private Iterator<String> currentIterator;
+
+	/** The current project. */
+	private PangaeaJournalInfo currentJournal;
+
+	private Iterator<PangaeaJournalInfo> inputIterator;
+
+	/** The logger. */
+	private static final Log log = LogFactory.getLog(DatasetsByProjectIterator.class);
+
+	public DatasetsByJournalIterator(final Iterator<PangaeaJournalInfo> iterator) {
+		this.inputIterator = iterator;
+		this.currentJournal = extractNextLine();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#hasNext()
+	 */
+	@Override
+	public boolean hasNext() {
+		// CASE WHEN WE REACH THE LAST ITEM ON CSV
+		// OR WE HAD SOME PROBLEM ON GET NEXT CSV ITEM
+		if (this.currentJournal == null) { return false; }
+		// IN THIS CASE WE HAVE ANOTHER DATASETS
+		// FOR THE CURRENT PROJECT AND RETURN TRUE
+		if (currentIterator != null && currentIterator.hasNext()) { return true; }
+		// OTHERWISE WE FINISHED TO ITERATE THE CURRENT
+		// SETS OF DATASETS FOR A PARTICULAR PROJECT
+		// SO WE HAVE TO RETRIEVE THE NEXT ITERATOR WITH
+		// ITEMS
+		this.currentJournal = extractNextLine();
+
+		while (this.currentJournal != null) {
+			currentIterator = getNextIterator();
+			// IF THE NEXT ITERATOR HAS ITEMS RETURN YES
+			// OTHERWISE THE CICLE CONTINUE
+			if (currentIterator.hasNext()) { return true; }
+			this.currentJournal = extractNextLine();
+		}
+		return false;
+
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#next()
+	 */
+	@Override
+	public String next() {
+		return this.currentIterator.next();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#remove()
+	 */
+	@Override
+	public void remove() {}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.lang.Iterable#iterator()
+	 */
+	@Override
+	public Iterator<String> iterator() {
+		if (this.currentJournal != null) {
+			currentIterator = getNextIterator();
+			return this;
+		}
+		return null;
+
+	}
+
+	private Iterator<String> getNextIterator() {
+		QueryField q = new QueryField();
+		RequestField r = new RequestField();
+		r.setQuery(q);
+		q.getTerm().put("ft-techkeyword", this.currentJournal.getJournalId());
+
+		return new DatasetsIterator(r, "", this.currentJournal).iterator();
+	}
+
+	/**
+	 * Extract next line.
+	 *
+	 * @return the map
+	 * @throws IOException
+	 *             Signals that an I/O exception has occurred.
+	 */
+	private PangaeaJournalInfo extractNextLine() {
+
+		if (this.inputIterator.hasNext() == false) { return null; }
+		return this.inputIterator.next();
+
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByProjectIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByProjectIterator.java
@ -0,0 +1,158 @@
+package eu.dnetlib.data.collector.plugins.datasets;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.google.common.collect.Maps;
+
+/**
+ * The Class DatasetsByProjectIterator.
+ */
+public class DatasetsByProjectIterator implements Iterable<String>, Iterator<String> {
+
+	private static final String SPLIT_REGEX = ";";
+
+	/** The project id key. */
+	public static String PROJECT_ID_KEY = "id";
+
+	/** The project name key. */
+	public static String PROJECT_NAME_KEY = "name";
+
+	/** The project corda id key. */
+	public static String PROJECT_CORDA_ID_KEY = "corda_id";
+
+	/** The current iterator. */
+	private Iterator<String> currentIterator;
+
+	/** The csv reader. */
+	private BufferedReader csvReader;
+
+	/** The current project. */
+	private Map<String, String> currentProject;
+
+	/** The logger. */
+	private static final Log log = LogFactory.getLog(DatasetsByProjectIterator.class);
+
+	/**
+	 * Instantiates a new datasets by project iterator.
+	 *
+	 * @param csvInputStream
+	 *            the csv input stream
+	 * @throws IOException
+	 *             Signals that an I/O exception has occurred.
+	 */
+	public DatasetsByProjectIterator(final InputStreamReader csvInputStream) throws IOException {
+		this.csvReader = new BufferedReader(csvInputStream);
+		this.currentProject = extractNextLine();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#hasNext()
+	 */
+	@Override
+	public boolean hasNext() {
+		// CASE WHEN WE REACH THE LAST ITEM ON CSV
+		// OR WE HAD SOME PROBLEM ON GET NEXT CSV ITEM
+		if (this.currentProject == null) { return false; }
+		// IN THIS CASE WE HAVE ANOTHER DATASETS
+		// FOR THE CURRENT PROJECT AND RETURN TRUE
+		if (currentIterator != null && currentIterator.hasNext()) { return true; }
+		// OTHERWISE WE FINISHED TO ITERATE THE CURRENT
+		// SETS OF DATASETS FOR A PARTICULAR PROJECT
+		// SO WE HAVE TO RETRIEVE THE NEXT ITERATOR WITH
+		// ITEMS
+		this.currentProject = extractNextLine();
+
+		while (this.currentProject != null) {
+			currentIterator = getNextIterator();
+			// IF THE NEXT ITERATOR HAS ITEMS RETURN YES
+			// OTHERWISE THE CICLE CONTINUE
+			if (currentIterator.hasNext()) { return true; }
+			this.currentProject = extractNextLine();
+		}
+		return false;
+
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#next()
+	 */
+	@Override
+	public String next() {
+		return this.currentIterator.next();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#remove()
+	 */
+	@Override
+	public void remove() {}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.lang.Iterable#iterator()
+	 */
+	@Override
+	public Iterator<String> iterator() {
+		if (this.currentProject != null) {
+			currentIterator = getNextIterator();
+			return this;
+		}
+		return null;
+
+	}
+
+	private Iterator<String> getNextIterator() {
+		QueryField q = new QueryField();
+		RequestField r = new RequestField();
+		r.setQuery(q);
+		q.getTerm().put("ft-techkeyword", this.currentProject.get(PROJECT_ID_KEY));
+		return new DatasetsIterator(r, this.currentProject.get(PROJECT_CORDA_ID_KEY), null).iterator();
+	}
+
+	/**
+	 * Extract next line.
+	 *
+	 * @return the map
+	 * @throws IOException
+	 *             Signals that an I/O exception has occurred.
+	 */
+	private Map<String, String> extractNextLine() {
+		String line;
+		try {
+			line = this.csvReader.readLine();
+		} catch (IOException e) {
+			return null;
+		}
+		// WE REACH THE END OF THE CSV
+		if (line == null) { return null; }
+		log.debug("splitting line: " + line);
+		String[] values = line.split(SPLIT_REGEX);
+		if (values == null || values.length != 4) {
+			log.error("Error on splitting line, the length must be 4");
+			return null;
+		}
+		int id = Integer.parseInt(values[0]);
+		String project_name = values[2];
+		String cordaId = values[3];
+		Map<String, String> splittedMap = Maps.newHashMap();
+		splittedMap.put(PROJECT_CORDA_ID_KEY, cordaId);
+		splittedMap.put(PROJECT_ID_KEY, "project" + id);
+		splittedMap.put(PROJECT_NAME_KEY, project_name);
+		log.debug(String.format("found project %s with id Corda: %s and id for API: %s", project_name, cordaId, "project" + id));
+		return splittedMap;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByProjectPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByProjectPlugin.java
@ -0,0 +1,27 @@
+package eu.dnetlib.data.collector.plugins.datasets;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.URL;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+public class DatasetsByProjectPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		try {
+			URL url = new URL(interfaceDescriptor.getBaseUrl());
+			url.openConnection();
+			InputStreamReader reader = new InputStreamReader(url.openStream());
+			DatasetsByProjectIterator iterator = new DatasetsByProjectIterator(reader);
+			return iterator;
+		} catch (IOException e) {
+			throw new CollectorServiceException("OOOPS something bad happen on creating iterator ", e);
+		}
+
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsIterator.java
@ -0,0 +1,274 @@
+package eu.dnetlib.data.collector.plugins.datasets;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+
+/**
+ * The Class JournalIterator.
+ */
+public class DatasetsIterator implements Iterable<String>, Iterator<String> {
+
+	/** The logger. */
+	private static final Log log = LogFactory.getLog(DatasetsIterator.class);
+
+	/** The base url template. */
+	private static String BASE_URL_TEMPLATE = "http://ws.pangaea.de/es/pangaea/panmd/_search?_source=xml&size=%d&from=%d";
+
+	/** The journal id. */
+	private String journalId = "";
+
+	/** The journal name. */
+	private String journalName = "";
+
+	/** The journal issn. */
+	private String journalISSN = "";
+
+	/** The openaire datasource. */
+	private String openaireDatasource = "";
+
+	/** The total. */
+	private long total;
+
+	/** The from. */
+	private int from;
+
+	/** The current iterator. */
+	private int currentIterator;
+
+	/** The current response. */
+	private ElasticSearchResponse currentResponse;
+
+	/** The request. */
+	private RequestField request;
+
+	/** The default size. */
+	private static int DEFAULT_SIZE = 10;
+
+	private String projectCordaId;
+
+	private static String RECORD_TEMPLATE = "<datasetsRecord><oaf:projectid xmlns:oaf=\"http://namespace.openaire.eu/oaf\">%s</oaf:projectid>"
+			+ "<journal name='%s' issn='%s' datasourceid = '%s'/><metadata>%s</metadata></datasetsRecord>";
+
+	/**
+	 * Instantiates a new journal iterator.
+	 * 
+	 * @param request
+	 *            the request
+	 */
+	public DatasetsIterator(final RequestField request, final String projectCordaId, final PangaeaJournalInfo info) {
+		this.request = request;
+		this.setProjectCordaId(projectCordaId);
+
+		if (info != null) {
+			this.setJournalId(info.getJournalId());
+			this.setJournalName(StringEscapeUtils.escapeXml(info.getJournalName()));
+			this.setJournalISSN(info.getJournalISSN());
+			this.setOpenaireDatasource(info.getDatasourceId());
+		}
+		log.debug("Start Iterator");
+	}
+
+	/**
+	 * Execute query.
+	 * 
+	 * @param from
+	 *            the from
+	 * @param size
+	 *            the size
+	 * @return the string
+	 */
+	private String executeQuery(final int from, final int size) {
+		log.debug("executing query " + this.request.getQuery().getTerm());
+		log.debug(String.format("from:%d size:%d", from, size));
+		CloseableHttpResponse response = null;
+		InputStream responseBody = null;
+		CloseableHttpClient httpclient = HttpClients.createDefault();
+		try {
+
+			HttpPost post = new HttpPost(String.format(BASE_URL_TEMPLATE, size, from));
+			Gson g = new GsonBuilder().disableHtmlEscaping().create();
+			StringEntity entry = new StringEntity(g.toJson(this.request));
+			post.setEntity(entry);
+			long start = System.currentTimeMillis();
+			response = httpclient.execute(post);
+			int statusCode = response.getStatusLine().getStatusCode();
+			if (statusCode == 200) {
+				responseBody = response.getEntity().getContent();
+				String s = IOUtils.toString(responseBody);
+				log.debug("Request done in " + (System.currentTimeMillis() - start) + " ms");
+				responseBody.close();
+				return s;
+			}
+			return null;
+		} catch (Exception e) {
+			log.error("Error on executing query :" + request.getQuery().getTerm(), e);
+			return null;
+		} finally {
+			try {
+				responseBody.close();
+				response.close();
+				httpclient.close();
+			} catch (IOException e) {
+				log.error("Can't close connections gracefully", e);
+			}
+		}
+
+	}
+
+	/**
+	 * Gets the journal id.
+	 * 
+	 * @return the journalId
+	 */
+	public String getJournalId() {
+		return journalId;
+	}
+
+	/**
+	 * Sets the journal id.
+	 * 
+	 * @param journalId
+	 *            the journalId to set
+	 */
+	public void setJournalId(final String journalId) {
+		this.journalId = journalId;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#hasNext()
+	 */
+	@Override
+	public boolean hasNext() {
+		return (from + currentIterator) < total;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#next()
+	 */
+	@Override
+	public String next() {
+		String xml = String.format(RECORD_TEMPLATE, this.projectCordaId, this.journalName, this.journalISSN, this.openaireDatasource, currentResponse
+				.getXmlRecords().get(currentIterator));
+		currentIterator++;
+		if (currentIterator == DEFAULT_SIZE) {
+			getNextItem();
+		}
+		return xml;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.util.Iterator#remove()
+	 */
+	@Override
+	public void remove() {
+		throw new UnsupportedOperationException();
+
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.lang.Iterable#iterator()
+	 */
+	@Override
+	public Iterator<String> iterator() {
+		from = 0;
+		total = 0;
+		getNextItem();
+		return this;
+	}
+
+	/**
+	 * Gets the next item.
+	 * 
+	 * @return the next item
+	 */
+	private void getNextItem() {
+		from += currentIterator;
+		currentResponse = ElasticSearchResponse.createNewResponse(executeQuery(from, DEFAULT_SIZE));
+		total = currentResponse == null ? 0 : currentResponse.getTotal();
+		log.debug("from : " + from + " total of the request is " + total);
+		currentIterator = 0;
+	}
+
+	/**
+	 * @return the projectCordaId
+	 */
+	public String getProjectCordaId() {
+		return projectCordaId;
+	}
+
+	/**
+	 * @param projectCordaId
+	 *            the projectCordaId to set
+	 */
+	public void setProjectCordaId(final String projectCordaId) {
+		this.projectCordaId = projectCordaId;
+	}
+
+	/**
+	 * @return the journalName
+	 */
+	public String getJournalName() {
+		return journalName;
+	}
+
+	/**
+	 * @param journalName
+	 *            the journalName to set
+	 */
+	public void setJournalName(final String journalName) {
+		this.journalName = journalName;
+	}
+
+	/**
+	 * @return the journalISSN
+	 */
+	public String getJournalISSN() {
+		return journalISSN;
+	}
+
+	/**
+	 * @param journalISSN
+	 *            the journalISSN to set
+	 */
+	public void setJournalISSN(final String journalISSN) {
+		this.journalISSN = journalISSN;
+	}
+
+	/**
+	 * @return the openaireDatasource
+	 */
+	public String getOpenaireDatasource() {
+		return openaireDatasource;
+	}
+
+	/**
+	 * @param openaireDatasource
+	 *            the openaireDatasource to set
+	 */
+	public void setOpenaireDatasource(final String openaireDatasource) {
+		this.openaireDatasource = openaireDatasource;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/ElasticSearchResponse.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/ElasticSearchResponse.java
@ -0,0 +1,82 @@
+package eu.dnetlib.data.collector.plugins.datasets;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+
+public class ElasticSearchResponse {
+
+	/** The logger. */
+	private static final Log log = LogFactory.getLog(ElasticSearchResponse.class);
+	private long total;
+	private List<String> xmlRecords;
+
+	public static ElasticSearchResponse createNewResponse(final String response) {
+		ElasticSearchResponse item = new ElasticSearchResponse();
+
+		if (response == null) {
+			log.fatal("Error: null elasticsearch reponse");
+			return null;
+
+		}
+		JsonElement jElement = new JsonParser().parse(response);
+		JsonObject jobject = jElement.getAsJsonObject();
+		if (jobject.has("hits")) {
+
+			item.setTotal(jobject.get("hits").getAsJsonObject().get("total").getAsLong());
+
+			JsonElement hits = ((JsonObject) jobject.get("hits")).get("hits");
+
+			JsonArray hitsObject = hits.getAsJsonArray();
+
+			List<String> records = new ArrayList<String>();
+
+			for (JsonElement elem : hitsObject) {
+				JsonObject _source = (JsonObject) ((JsonObject) elem).get("_source");
+				String xml = _source.get("xml").getAsString();
+				records.add(xml);
+			}
+			item.setXmlRecords(records);
+			return item;
+		}
+		return null;
+	}
+
+	/**
+	 * @return the xmlRecords
+	 */
+	public List<String> getXmlRecords() {
+		return xmlRecords;
+	}
+
+	/**
+	 * @param xmlRecords
+	 *            the xmlRecords to set
+	 */
+	public void setXmlRecords(final List<String> xmlRecords) {
+		this.xmlRecords = xmlRecords;
+	}
+
+	/**
+	 * @return the total
+	 */
+	public long getTotal() {
+		return total;
+	}
+
+	/**
+	 * @param total
+	 *            the total to set
+	 */
+	public void setTotal(final long total) {
+		this.total = total;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/PangaeaJournalInfo.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/PangaeaJournalInfo.java
@ -0,0 +1,92 @@
+package eu.dnetlib.data.collector.plugins.datasets;
+
+/**
+ * The Class PangaeaJorunalInfo.
+ */
+public class PangaeaJournalInfo {
+
+	/** The journal name. */
+	private String journalName;
+
+	/** The journal id. */
+	private String journalId;
+
+	/** The datasource id. */
+	private String datasourceId;
+
+	/** The journal issn. */
+	private String journalISSN;
+
+	/**
+	 * Gets the journal name.
+	 *
+	 * @return the journal name
+	 */
+	public String getJournalName() {
+		return journalName;
+	}
+
+	/**
+	 * Sets the journal name.
+	 *
+	 * @param journalName
+	 *            the new journal name
+	 */
+	public void setJournalName(final String journalName) {
+		this.journalName = journalName;
+	}
+
+	/**
+	 * Gets the journal id.
+	 *
+	 * @return the journal id
+	 */
+	public String getJournalId() {
+		return journalId;
+	}
+
+	/**
+	 * Sets the journal id.
+	 *
+	 * @param journalId
+	 *            the new journal id
+	 */
+	public void setJournalId(final String journalId) {
+		this.journalId = journalId;
+	}
+
+	/**
+	 * Gets the datasource id.
+	 *
+	 * @return the datasource id
+	 */
+	public String getDatasourceId() {
+		return datasourceId;
+	}
+
+	/**
+	 * Sets the datasource id.
+	 *
+	 * @param datasourceId
+	 *            the new datasource id
+	 */
+	public void setDatasourceId(final String datasourceId) {
+		this.datasourceId = datasourceId;
+	}
+
+	/**
+	 * @return the journalISSN
+	 */
+	public String getJournalISSN() {
+		return journalISSN;
+	}
+
+	/**
+	 * @param journalISSN
+	 *            the journalISSN to set
+	 */
+	public void setJournalISSN(final String journalISSN) {
+		this.journalISSN = journalISSN;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/QueryField.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/QueryField.java
@ -0,0 +1,29 @@
+package eu.dnetlib.data.collector.plugins.datasets;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class QueryField {
+
+	private Map<String, String> term;
+
+	public QueryField() {
+		setTerm(new HashMap<String, String>());
+	}
+
+	/**
+	 * @return the term
+	 */
+	public Map<String, String> getTerm() {
+		return term;
+	}
+
+	/**
+	 * @param term
+	 *            the term to set
+	 */
+	public void setTerm(final Map<String, String> term) {
+		this.term = term;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/RequestField.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/RequestField.java
@ -0,0 +1,21 @@
+package eu.dnetlib.data.collector.plugins.datasets;
+
+public class RequestField {
+
+	private QueryField query;
+
+	/**
+	 * @return the query
+	 */
+	public QueryField getQuery() {
+		return query;
+	}
+
+	/**
+	 * @param query the query to set
+	 */
+	public void setQuery(QueryField query) {
+		this.query = query;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasources/Re3DataCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasources/Re3DataCollectorPlugin.java
@ -0,0 +1,66 @@
+package eu.dnetlib.data.collector.plugins.datasources;
+
+import java.io.IOException;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.io.IOUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+
+/**
+ * Plugin to collect metadata record about data repositories from re3data.
+ * <p>
+ * Documentation on re3data API: http://service.re3data.org/api/doc.
+ * </p>
+ * <p>
+ * BaseURL: http://service.re3data.org
+ * </p>
+ * <p>
+ * API to get the list of repos: baseURL + /api/v1/repositories
+ * </p>
+ * <p>
+ * API to get a repository: baseURL + content of link/@href of the above list
+ * </p>
+ *
+ * @author alessia
+ *
+ */
+public class Re3DataCollectorPlugin extends AbstractCollectorPlugin {
+
+	private String repositoryListPath = "/api/v1/repositories";
+
+	@Autowired
+	private HttpConnector httpConnector;
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		String repositoryListURL = interfaceDescriptor.getBaseUrl() + repositoryListPath;
+		String input;
+		try {
+			input = httpConnector.getInputSource(repositoryListURL);
+			return new Re3DataRepositoriesIterator(IOUtils.toInputStream(input, "UTF-8"), interfaceDescriptor.getBaseUrl(), getHttpConnector());
+		} catch (IOException e) {
+			throw new CollectorServiceException(e);
+		}
+
+	}
+
+	public String getRepositoryListPath() {
+		return repositoryListPath;
+	}
+
+	public void setRepositoryListPath(final String repositoryListPath) {
+		this.repositoryListPath = repositoryListPath;
+	}
+
+	public HttpConnector getHttpConnector() {
+		return httpConnector;
+	}
+
+	public void setHttpConnector(final HttpConnector httpConnector) {
+		this.httpConnector = httpConnector;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasources/Re3DataRepositoriesIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasources/Re3DataRepositoriesIterator.java
@ -0,0 +1,151 @@
+package eu.dnetlib.data.collector.plugins.datasources;
+
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
+
+public class Re3DataRepositoriesIterator implements Iterator<String>, Iterable<String> {
+
+	private static final Log log = LogFactory.getLog(Re3DataRepositoriesIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
+
+	private String baseURL;
+	private XMLStreamReader reader;
+	private int countedRepos = 0;
+	private String currentRepoPath = null;
+
+	private HttpConnector httpConnector;
+
+	@Override
+	public boolean hasNext() {
+		return currentRepoPath != null;
+	}
+
+	@Override
+	public String next() {
+		if (currentRepoPath == null) throw new NoSuchElementException();
+
+		try {
+			String repoInfo = getRepositoryInfo(currentRepoPath);
+			return repoInfo;
+		} finally {
+			currentRepoPath = moveToNextRepo();
+		}
+	}
+
+	@Override
+	public void remove() {
+		throw new UnsupportedOperationException();
+	}
+
+	@Override
+	public Iterator<String> iterator() {
+		return this;
+	}
+
+	public Re3DataRepositoriesIterator(final InputStream xmlInputStream, final String baseUrl, final HttpConnector httpConnector) throws CollectorServiceException {
+		this.httpConnector = httpConnector;
+		XMLInputFactory factory = XMLInputFactory.newInstance();
+		try {
+			reader = factory.createXMLStreamReader(xmlInputStream);
+		} catch (XMLStreamException e) {
+			throw new CollectorServiceException(e);
+		}
+		baseURL = baseUrl;
+
+		// try to fetch the 1st
+		currentRepoPath = moveToNextRepo();
+	}
+
+	private String getNextRepositoryPath() {
+		return reader.getAttributeValue(null, "href");
+	}
+
+	private String moveToNextRepo() {
+		try {
+			while (reader.hasNext()) {
+				int event = reader.next();
+				if (event == XMLStreamConstants.START_ELEMENT) {
+					String elementName = reader.getLocalName();
+					if (elementName.equals("link")) {
+						String repoPath = getNextRepositoryPath();
+						log.debug(String.format("Found %s repositories. The last has link %s", ++countedRepos, repoPath));
+						return repoPath;
+					}
+				}
+			}
+			log.info("Seems there are no more repository to iterate on. Total: " + countedRepos);
+			return null;
+		} catch (XMLStreamException e) {
+			throw new CollectorServiceRuntimeException(e);
+		}
+	}
+
+	private String getRepositoryInfo(final String repositoryPath) throws CollectorServiceRuntimeException {
+
+		String targetURL = repositoryPath;
+		if(!repositoryPath.startsWith(baseURL))
+			targetURL = baseURL + repositoryPath;
+		try {
+			log.info(targetURL);
+			String inputSource = getHttpConnector().getInputSource(targetURL);
+
+			return XmlCleaner.cleanAllEntities(inputSource);
+		} catch (CollectorServiceException e) {
+			throw new CollectorServiceRuntimeException("OOOPS something bad happen getting repo info from " + targetURL, e);
+		}
+	}
+
+//	public String testAccess(){
+//		return getRepositoryInfo("/api/v1/repository/r3d100012823");
+//	}
+	public String getBaseURL() {
+		return baseURL;
+	}
+
+	public void setBaseURL(final String baseURL) {
+		this.baseURL = baseURL;
+	}
+
+	public int getCountedRepos() {
+		return countedRepos;
+	}
+
+	public void setCountedRepos(final int countedRepos) {
+		this.countedRepos = countedRepos;
+	}
+
+	public XMLStreamReader getReader() {
+		return reader;
+	}
+
+	public void setReader(final XMLStreamReader reader) {
+		this.reader = reader;
+	}
+
+	public String getCurrentRepoPath() {
+		return currentRepoPath;
+	}
+
+	public void setCurrentRepoPath(final String currentRepoPath) {
+		this.currentRepoPath = currentRepoPath;
+	}
+
+	public HttpConnector getHttpConnector() {
+		return httpConnector;
+	}
+
+	public void setHttpConnector(final HttpConnector httpConnector) {
+		this.httpConnector = httpConnector;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/excel/CSVFileWriter.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/excel/CSVFileWriter.java
@ -0,0 +1,57 @@
+package eu.dnetlib.data.collector.plugins.excel;
+
+/**
+ * Created by miriam on 10/05/2017.
+ */
+import java.io.BufferedWriter;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import org.apache.commons.csv.CSVPrinter;
+import org.apache.commons.csv.CSVFormat;
+
+public class CSVFileWriter {
+	private static final String NEW_LINE_SEPARATOR = "\n";
+
+	private Object [] file_header ;
+	private ArrayList<ArrayList<String>> projects = new ArrayList<ArrayList<String>>();
+
+	public void setHeader(String[] header){
+		this.file_header = header;
+	}
+
+	public void addProject(ArrayList<String> project) {
+		projects.add(project);
+
+	}
+
+	public void writeFile(String csv_file_path){
+		BufferedWriter writer = null;
+		CSVPrinter csvFilePrinter = null;
+
+		CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR);
+
+		try{
+			writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csv_file_path),"UTF-8"));
+
+			csvFilePrinter = new CSVPrinter(writer,csvFileFormat);
+			csvFilePrinter.printRecord(file_header);
+
+			for(ArrayList<String> project:projects){
+				csvFilePrinter.printRecord(project);
+			}
+		}catch(Exception e){
+			e.printStackTrace();
+		}finally{
+			try{
+				writer.flush();
+				writer.close();
+				csvFilePrinter.close();
+			}catch(IOException ioe){
+				ioe.printStackTrace();
+			}
+		}
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/excel/Read.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/excel/Read.java
@ -0,0 +1,256 @@
+package eu.dnetlib.data.collector.plugins.excel;
+
+/**
+ * Created by miriam on 10/05/2017.
+ */
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+
+import eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.json.*;
+
+import org.apache.commons.io.FileUtils;
+
+public class Read {
+
+	private static final Log log = LogFactory.getLog(Read.class);
+
+	/** The descriptor. */
+	private InterfaceDescriptor descriptor;
+
+
+	/*private final String EXCEL_FILE_URL ="https://pf.fwf.ac.at/en/research-in-practice/project-finder.xlsx?&&&search%5Bcall%5D=&search%5Bdecision_board_ids%5D=&search%5Bend_date%5D=&search%5Binstitute_name%5D=&search%5Blead_firstname%5D=&search%5Blead_lastname%5D=&search%5Bper_page%5D=10&search%5Bproject_number%5D=&search%5Bproject_title%5D=&search%5Bscience_discipline_id%5D=&search%5Bstart_date%5D=&search%5Bstatus_id%5D=&search%5Bwhat%5D=&action=index&controller=projects&locale=en&per_page=10";
+	private final String CSV_FILE_PATH = "//Users//miriam//Documents//svn//mirima//FWF//projects_search2017.05.09.5.csv";
+	 private final String argument = "{\"replace\":{\"header\":[{\"from\":\"&\",\"to\":\"and\"}],\"body\":[{\"from\":\"\\n\",\"to\":\" \"}]}," +
+	  "\"replace_currency\":[{\"from\":\"$\",\"to\":\"â‚¬\"}],"
+			   + "\"col_currency\":10}"; */
+	private Sheet sheet;
+	private CSVFileWriter csv_writer = new CSVFileWriter();
+	private HashMap<String,String> map_header = new HashMap<String,String>();
+	private HashMap<String,String> map_body = new HashMap<String,String>();
+	private int header_row;
+	private String file_to_save ;
+	private boolean replace_currency = false;
+	private String from_currency, to_currency;
+	private boolean remove_empty, remove_tmp_file;
+	private String remove_id;
+	private int column_id;
+	private int currency_column;
+	private int sheet_number;
+	private String tmp_file;
+	private String argument;
+	private String identifier;
+
+	private HttpCSVCollectorPlugin collector;
+
+	public HttpCSVCollectorPlugin getCollector() {
+		return collector;
+	}
+
+	public void setCollector(HttpCSVCollectorPlugin collector) {
+		this.collector = collector;
+	}
+
+	public Read(InterfaceDescriptor descriptor){
+		this.descriptor = descriptor;
+
+	}
+
+	private static String getCellValue( Cell cell)
+	{
+		DataFormatter formatter = new DataFormatter();
+		String formattedCellValue = formatter.formatCellValue(cell);
+		return formattedCellValue;
+
+	}
+
+	private void copyFile() throws IOException{
+		FileUtils.copyURLToFile(new URL(descriptor.getBaseUrl()), new File(tmp_file));
+
+	}
+
+	private void parseDescriptor(){
+		HashMap<String, String> params = descriptor.getParams();
+		argument = params.get("argument");
+		header_row = Integer.parseInt(params.get("header_row"));
+		tmp_file = params.get("tmp_file");
+		remove_empty = (params.get("remove_empty_lines") == "yes");
+		remove_id = params.get("remove_lines_with_id");
+		column_id = Integer.parseInt(params.get("col_id"));
+		remove_tmp_file = (params.get("remove_tmp_file") == "yes");
+		sheet_number = Integer.parseInt(params.get("sheet_number"));
+		file_to_save = params.get("file_to_save");
+	}
+	private void init() throws IOException{
+		parseDescriptor();
+		log.info("Parsing the arguments");
+		parseArguments();
+		log.info("Copying the file in temp local file");
+		copyFile();
+		log.info("Extracting the sheet " + sheet_number);
+		FileInputStream fis = new FileInputStream(tmp_file);
+		Workbook workbook = new XSSFWorkbook(fis);
+		sheet = workbook.getSheetAt(sheet_number);
+		fis.close();
+		if(remove_tmp_file) {
+			File f = new File(tmp_file);
+			f.delete();
+		}
+
+	}
+
+	private void fillMap(JSONObject json, HashMap<String,String> map, String elem){
+		try{
+			final JSONArray arr = json.getJSONObject("replace").getJSONArray(elem);
+			for(Object entry: arr)
+				map.put(((JSONObject)entry).getString("from"), ((JSONObject)entry).getString("to"));
+		}catch(Throwable e){
+			log.error("Problems filling the map for " + elem);
+			throw(e);
+		}
+
+	}
+
+
+
+	private void parseArguments() {
+		if (StringUtils.isNotEmpty(argument)){
+			try{
+				final JSONObject json = new JSONObject(argument);
+				if(json.has("header"))
+					fillMap(json, map_header,"header");
+				if (json.has("body"))
+					fillMap(json,map_body,"body");
+
+				if(json.has("replace_currency"))
+				{
+					replace_currency = true	;
+					from_currency = json.getJSONArray("replace_currency").getJSONObject(0).getString("from");
+					to_currency = json.getJSONArray("replace_currency").getJSONObject(0).getString("to");
+
+				}
+
+				if (json.has("col_currency"))
+					currency_column = json.getInt("col_currency");
+			}catch(Throwable e){
+				log.error("Problems while parsing the argument parameter.");
+				throw (e);
+			}
+		}
+
+
+
+	}
+
+	private String applyReplace(String row, HashMap<String,String>replace){
+		for(String key: replace.keySet()){
+			if(row.contains(key))
+				row = row.replace(key, replace.get(key));
+		}
+		return row;
+	}
+
+	private void getHeader(){
+		Row row = sheet.getRow(header_row);
+		Iterator<Cell> cellIterator = row.cellIterator();
+		Cell cell;
+		String project = "";
+		int count = 0;
+		while (cellIterator.hasNext()){
+			cell = cellIterator.next();
+			final String stringCellValue = cell.getStringCellValue();
+			project += applyReplace(stringCellValue,map_header) + ";";
+			if(count++ == column_id) identifier = applyReplace(stringCellValue,map_header);
+		}
+		project = project.substring(0, project.length() -1 );
+		csv_writer.setHeader(project.split(";"));
+
+	}
+
+	private void getData(){
+		Row row;
+		Cell cell;
+		String tmp;
+		Iterator<Cell>cellIterator;
+		for(int row_number = header_row + 1; row_number < sheet.getLastRowNum(); row_number++){
+			row = sheet.getRow(row_number);
+			if (row != null) {
+				cellIterator = row.cellIterator();
+
+				int col_number = 0;
+
+				boolean discard_row = false;
+				ArrayList<String> al = new ArrayList<String>();
+				while (cellIterator.hasNext() && !discard_row) {
+					cell = cellIterator.next();
+					tmp = getCellValue(cell).trim();
+					tmp = tmp.replace("\n"," ");
+					if (col_number == column_id &&
+							((remove_empty && tmp.trim().equals("")) ||
+									(!remove_id.equals("") && tmp.equals(remove_id))))
+						discard_row = true;
+
+					if (replace_currency && col_number == currency_column)
+						tmp = tmp.replace(from_currency, to_currency);
+
+					al.add(applyReplace(tmp, map_body));
+					col_number++;
+				}
+				if (!discard_row) {
+					csv_writer.addProject(al);
+
+				}
+			}
+		}
+
+	}
+
+	private void writeCSVFile(){
+
+		csv_writer.writeFile(file_to_save);
+	}
+
+	private InterfaceDescriptor prepareHTTPCSVDescriptor(){
+		InterfaceDescriptor dex = new InterfaceDescriptor();
+		dex.setBaseUrl("file://"+file_to_save);
+		HashMap<String, String> params = new HashMap<String, String>();
+		params.put("separator", descriptor.getParams().get("separator"));
+		params.put("identifier",identifier);
+		params.put("quote",descriptor.getParams().get("quote"));
+		dex.setParams(params);
+		return dex;
+	}
+
+	public Iterable<String> parseFile() throws Exception{
+
+
+		init();
+		log.info("Getting header elements");
+		getHeader();
+		log.info("Getting sheet data");
+		getData();
+		log.info("Writing the csv file");
+		writeCSVFile();
+		log.info("Preparing to parse csv");
+
+		return collector.collect(prepareHTTPCSVDescriptor(),"","");
+
+	}
+
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/excel/ReadExcelPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/excel/ReadExcelPlugin.java
@ -0,0 +1,39 @@
+package eu.dnetlib.data.collector.plugins.excel;
+
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Required;
+
+/**
+ * Created by miriam on 10/05/2017.
+ */
+public class ReadExcelPlugin extends AbstractCollectorPlugin{
+
+	private static final Log log = LogFactory.getLog(ReadExcelPlugin.class);
+	@Autowired
+	HttpCSVCollectorPlugin httpCSVCollectorPlugin;
+
+
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		Read r = new Read(interfaceDescriptor);
+		r.setCollector(httpCSVCollectorPlugin);
+
+		try {
+			return r.parseFile();
+		}catch(Exception e){
+			log.error("Error importing excel file");
+			throw new CollectorServiceException(e);
+		}
+
+
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesfrommetadata/FilesFromMetadataCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesfrommetadata/FilesFromMetadataCollectorPlugin.java
@ -0,0 +1,27 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collector.plugins.filesfrommetadata;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+
+/**
+ * @author sandro
+ *
+ */
+public class FilesFromMetadataCollectorPlugin extends AbstractCollectorPlugin {
+
+	/**
+	 * {@inheritDoc}
+	 * @see eu.dnetlib.data.collector.plugin.CollectorPlugin#collect(eu.dnetlib.data.collector.rmi.InterfaceDescriptor, java.lang.String, java.lang.String)
+	 */
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor arg0, final String arg1, final String arg2) throws CollectorServiceException {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesfrommetadata/PopulateFileDownloadBasePath.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesfrommetadata/PopulateFileDownloadBasePath.java
@ -0,0 +1,61 @@
+package eu.dnetlib.data.collector.plugins.filesfrommetadata;
+
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import eu.dnetlib.data.collector.functions.ParamValuesFunction;
+import eu.dnetlib.data.collector.rmi.ProtocolParameterValue;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+import eu.dnetlib.enabling.locators.UniqueServiceLocator;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+
+/**
+ * Created by alessia on 17/12/15.
+ */
+public class PopulateFileDownloadBasePath implements ParamValuesFunction {
+
+	private static final Log log = LogFactory.getLog(PopulateFileDownloadBasePath.class);
+	@Autowired
+	private UniqueServiceLocator serviceLocator;
+
+	@Value("${services.objectstore.basePathList.xquery}")
+	private String xQueryForObjectStoreBasePath;
+
+	@Override
+	public List<ProtocolParameterValue> findValues(final String s, final Map<String, String> map) {
+		try {
+			return Lists.transform(serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQueryForObjectStoreBasePath),
+					new Function<String, ProtocolParameterValue>() {
+						@Override
+						public ProtocolParameterValue apply(final String s) {
+							return new ProtocolParameterValue(s, s);
+						}
+					});
+		} catch (ISLookUpException e) {
+			log.error("Cannot read Object store service properties", e);
+		}
+		return Lists.newArrayList();
+	}
+
+	public UniqueServiceLocator getServiceLocator() {
+		return serviceLocator;
+	}
+
+	public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
+		this.serviceLocator = serviceLocator;
+	}
+
+	public String getxQueryForObjectStoreBasePath() {
+		return xQueryForObjectStoreBasePath;
+	}
+
+	public void setxQueryForObjectStoreBasePath(final String xQueryForObjectStoreBasePath) {
+		this.xQueryForObjectStoreBasePath = xQueryForObjectStoreBasePath;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesystem/FileSystemIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesystem/FileSystemIterator.java
@ -0,0 +1,89 @@
+package eu.dnetlib.data.collector.plugins.filesystem;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Sets;
+
+/**
+ * Class enabling lazy and recursive iteration of a filesystem tree. The iterator iterates over file paths.
+ *
+ * @author Andrea
+ *
+ */
+public class FileSystemIterator implements Iterator<String> {
+
+	/** The logger */
+	private static final Log log = LogFactory.getLog(FileSystemIterator.class);
+
+	private Set<String> extensions = Sets.newHashSet();
+	private Iterator<Path> pathIterator;
+	private String current;
+
+	public FileSystemIterator(final String baseDir, final String extensions) {
+		if(StringUtils.isNotBlank(extensions)) {
+			this.extensions = Sets.newHashSet(extensions.split(","));
+		}
+		try {
+			this.pathIterator = Files.newDirectoryStream(Paths.get(baseDir)).iterator();
+			this.current = walkTillNext();
+		} catch (IOException e) {
+			log.error("Cannot initialize File System Iterator. Is this path correct? " + baseDir);
+			throw new RuntimeException("Filesystem collection error.", e);
+		}
+	}
+
+	@Override
+	public boolean hasNext() {
+		return current != null;
+	}
+
+	@Override
+	public synchronized String next() {
+		String pivot = new String(current);
+		current = walkTillNext();
+		log.debug("Returning: " + pivot);
+		return pivot;
+	}
+
+	@Override
+	public void remove() {}
+
+	/**
+	 * Walk the filesystem recursively until it finds a candidate. Strategies: a) For any directory found during the walk, an iterator is
+	 * built and concat to the main one; b) Any file is checked against admitted extensions
+	 *
+	 * @return the next element to be returned by next call of this.next()
+	 */
+	private synchronized String walkTillNext() {
+		while (pathIterator.hasNext()) {
+			Path nextFilePath = pathIterator.next();
+			if (Files.isDirectory(nextFilePath)) {
+				// concat
+				try {
+					pathIterator = Iterators.concat(pathIterator, Files.newDirectoryStream(nextFilePath).iterator());
+					log.debug("Adding folder iterator: " + nextFilePath.toString());
+				} catch (IOException e) {
+					log.error("Cannot create folder iterator! Is this path correct? " + nextFilePath.toString());
+					return null;
+				}
+			} else {
+				if (extensions.isEmpty() || extensions.contains(FilenameUtils.getExtension(nextFilePath.toString()))) {
+					log.debug("Returning: " + nextFilePath.toString());
+					return nextFilePath.toString();
+				}
+			}
+		}
+		return null;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesystem/FilesystemCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesystem/FilesystemCollectorPlugin.java
@ -0,0 +1,23 @@
+package eu.dnetlib.data.collector.plugins.filesystem;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ *
+ * @author andrea
+ *
+ */
+public class FilesystemCollectorPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
+		return new FilesystemIterable(interfaceDescriptor);
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesystem/FilesystemIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/filesystem/FilesystemIterable.java
@ -0,0 +1,139 @@
+package eu.dnetlib.data.collector.plugins.filesystem;
+
+import java.io.*;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.List;
+
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.ximpleware.*;
+import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.json.JSONObject;
+import org.json.XML;
+
+/**
+ * The Class FilesystemIterable.
+ *
+ * @author Sandro, Michele, Andrea
+ */
+public class FilesystemIterable implements Iterable<String> {
+
+	/**
+	 * The Constant log.
+	 */
+	private static final Log log = LogFactory.getLog(FilesystemIterable.class);
+
+	/**
+	 * The base dir.
+	 */
+	private File baseDir;
+
+	/**
+	 * The extensions.
+	 */
+	private String extensions;
+
+	/**
+	 * File format (json / xml)
+	 **/
+	private String fileFormat = "xml";
+
+	private List<String> supportedFormats = Lists.newArrayList("xml", "json");
+
+	private boolean setObjIdentifierFromFileName = false;
+
+	/**
+	 * Instantiates a new filesystem iterable.
+	 *
+	 * @param descriptor the descriptor
+	 * @throws CollectorServiceException the collector service exception
+	 */
+	public FilesystemIterable(final InterfaceDescriptor descriptor) throws CollectorServiceException {
+		try {
+			final String baseUrl = descriptor.getBaseUrl();
+			URL basePath = new URL(baseUrl);
+			this.baseDir = new File(basePath.getPath());
+			if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
+			this.extensions = descriptor.getParams().get("extensions");
+			if (descriptor.getParams().containsKey("fileFormat")) fileFormat = descriptor.getParams().get("fileFormat");
+			if (!supportedFormats.contains(fileFormat))
+				throw new CollectorServiceException("File format " + fileFormat + " not supported. Supported formats are: " + StringUtils
+						.join(supportedFormats, ','));
+			if (descriptor.getParams().containsKey("setObjIdentifierFromFileName")) {
+				setObjIdentifierFromFileName = Boolean.parseBoolean(descriptor.getParams().get("setObjIdentifierFromFileName"));
+			}
+		} catch (MalformedURLException e) {
+			throw new CollectorServiceException("Filesystem collector failed! ", e);
+		}
+	}
+
+	/**
+	 * {@inheritDoc}
+	 *
+	 * @see java.lang.Iterable#iterator()
+	 */
+	@Override
+	public Iterator<String> iterator() {
+		final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), extensions);
+		return Iterators.transform(fsi, inputFileName -> {
+			FileInputStream fileInputStream = null;
+			try {
+				fileInputStream = new FileInputStream(inputFileName);
+				final String s = IOUtils.toString(fileInputStream);
+				if (fileFormat.equalsIgnoreCase("json")) {
+					JSONObject json = new JSONObject(s);
+					JSONObject obj = new JSONObject();
+					if (setObjIdentifierFromFileName) {
+						obj.put("header", new JSONObject().put("objIdentifier", FilenameUtils.getBaseName(inputFileName)));
+					}
+					obj.put("metadata", json);
+					log.debug(obj.toString());
+					return XML.toString(obj, "record");
+				}
+				String cleanedXML = XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
+				if (setObjIdentifierFromFileName) {
+					return addObjIdentifier(cleanedXML, FilenameUtils.getBaseName(inputFileName));
+				} else return cleanedXML;
+			} catch (VTDException e) {
+				log.error("Cannot process with VTD to set the objIdentifier " + inputFileName);
+				return "";
+			} catch (Exception e) {
+				log.error("Unable to read " + inputFileName);
+				return "";
+			} finally {
+				if (fileInputStream != null) {
+					try {
+						fileInputStream.close();
+					} catch (IOException e) {
+						log.error("Unable to close inputstream for  " + inputFileName);
+					}
+				}
+			}
+		});
+	}
+
+	private String addObjIdentifier(String xml, String objidentifier) throws VTDException, IOException {
+		VTDGen vg = new VTDGen(); // Instantiate VTDGen
+		XMLModifier xm = new XMLModifier(); //Instantiate XMLModifier
+		vg.setDoc(xml.getBytes("UTF-8"));
+		vg.parse(false);
+		VTDNav vn = vg.getNav();
+		xm.bind(vn);
+		if (vn.toElement(VTDNav.ROOT)) {
+			xm.insertBeforeElement("<record><header><objIdentifier>" + objidentifier + "</objIdentifier></header><metadata>");
+			xm.insertAfterElement("</metadata></record>");
+		}
+		ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		xm.output(baos);
+		return baos.toString("UTF-8");
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpCollectorPlugin.java
@ -0,0 +1,66 @@
+package eu.dnetlib.data.collector.plugins.ftp;
+
+import java.util.Iterator;
+import java.util.Set;
+
+import com.google.common.base.Splitter;
+import com.google.common.collect.Sets;
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.springframework.beans.factory.annotation.Required;
+
+/**
+ *
+ * @author Author: Andrea Mannocci
+ *
+ */
+public class FtpCollectorPlugin extends AbstractCollectorPlugin {
+
+	private FtpIteratorFactory ftpIteratorFactory;
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+		final String username = interfaceDescriptor.getParams().get("username");
+		final String password = interfaceDescriptor.getParams().get("password");
+		final String recursive = interfaceDescriptor.getParams().get("recursive");
+		final String extensions = interfaceDescriptor.getParams().get("extensions");
+
+		if ((baseUrl == null) || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
+		if ((username == null) || username.isEmpty()) { throw new CollectorServiceException("Param 'username' is null or empty"); }
+		if ((password == null) || password.isEmpty()) { throw new CollectorServiceException("Param 'password' is null or empty"); }
+		if ((recursive == null) || recursive.isEmpty()) { throw new CollectorServiceException("Param 'recursive' is null or empty"); }
+		if ((extensions == null) || extensions.isEmpty()) { throw new CollectorServiceException("Param 'extensions' is null or empty"); }
+
+		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); }
+
+		return new Iterable<String>() {
+
+			boolean isRecursive = "true".equals(recursive);
+
+			Set<String> extensionsSet = parseSet(extensions);
+
+			@Override
+			public Iterator<String> iterator() {
+				return getFtpIteratorFactory().newIterator(baseUrl, username, password, isRecursive, extensionsSet, fromDate);
+			}
+
+			private Set<String> parseSet(final String extensions) {
+				return Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(extensions));
+			}
+		};
+	}
+
+	public FtpIteratorFactory getFtpIteratorFactory() {
+		return ftpIteratorFactory;
+	}
+
+	@Required
+	public void setFtpIteratorFactory(final FtpIteratorFactory ftpIteratorFactory) {
+		this.ftpIteratorFactory = ftpIteratorFactory;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpIterator.java
@ -0,0 +1,208 @@
+package eu.dnetlib.data.collector.plugins.ftp;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.*;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
+import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.net.ftp.FTPClient;
+import org.apache.commons.net.ftp.FTPFile;
+import org.apache.commons.net.ftp.FTPReply;
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+/**
+ *
+ * @author Author: Andrea Mannocci
+ *
+ */
+public class FtpIterator implements Iterator<String> {
+
+	private static final Log log = LogFactory.getLog(FtpIterator.class);
+
+	private static final int MAX_RETRIES = 5;
+	private static final int DEFAULT_TIMEOUT = 30000;
+	private static final long BACKOFF_MILLIS = 10000;
+
+	private FTPClient ftpClient;
+	private String ftpServerAddress;
+	private String remoteFtpBasePath;
+	private String username;
+	private String password;
+	private boolean isRecursive;
+	private Set<String> extensionsSet;
+	private boolean incremental;
+	private DateTime fromDate = null;
+	private DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd");
+
+	private Queue<String> queue;
+
+	public FtpIterator(final String baseUrl, final String username, final String password, final boolean isRecursive,
+			final Set<String> extensionsSet, String fromDate) {
+		this.username = username;
+		this.password = password;
+		this.isRecursive = isRecursive;
+		this.extensionsSet = extensionsSet;
+		this.incremental = StringUtils.isNotBlank(fromDate);
+		if (incremental) {
+			//I expect fromDate in the format 'yyyy-MM-dd'. See class eu.dnetlib.msro.workflows.nodes.collect.FindDateRangeForIncrementalHarvestingJobNode .
+			this.fromDate = DateTime.parse(fromDate, simpleDateTimeFormatter);
+			log.debug("fromDate string: " + fromDate + " -- parsed: " + this.fromDate.toString());
+		}
+		try {
+			URL server = new URL(baseUrl);
+			this.ftpServerAddress = server.getHost();
+			this.remoteFtpBasePath = server.getPath();
+		} catch (MalformedURLException e1) {
+			throw new CollectorServiceRuntimeException("Malformed URL exception " + baseUrl);
+		}
+
+		connectToFtpServer();
+		initializeQueue();
+	}
+
+	private void connectToFtpServer() {
+		ftpClient = new FTPClient();
+		ftpClient.setDefaultTimeout(DEFAULT_TIMEOUT);
+		ftpClient.setDataTimeout(DEFAULT_TIMEOUT);
+		ftpClient.setConnectTimeout(DEFAULT_TIMEOUT);
+		try {
+			ftpClient.connect(ftpServerAddress);
+
+			// try to login
+			if (!ftpClient.login(username, password)) {
+				ftpClient.logout();
+				throw new CollectorServiceRuntimeException("Unable to login to FTP server " + ftpServerAddress);
+			}
+			int reply = ftpClient.getReplyCode();
+			if (!FTPReply.isPositiveCompletion(reply)) {
+				ftpClient.disconnect();
+				throw new CollectorServiceRuntimeException("Unable to connect to FTP server " + ftpServerAddress);
+			}
+
+			ftpClient.enterLocalPassiveMode();
+			log.info("Connected to FTP server " + ftpServerAddress);
+			log.info(String.format("FTP collecting from %s with recursion = %s", remoteFtpBasePath, isRecursive));
+		} catch (IOException e) {
+			throw new CollectorServiceRuntimeException("Unable to connect to FTP server " + ftpServerAddress);
+		}
+	}
+
+	private void disconnectFromFtpServer() {
+		try {
+			if (ftpClient.isConnected()) {
+				ftpClient.logout();
+				ftpClient.disconnect();
+			}
+		} catch (IOException e) {
+			log.error("Failed to logout & disconnect from the FTP server", e);
+		}
+	}
+
+	private void initializeQueue() {
+		queue = new LinkedList<String>();
+		listDirectoryRecursive(remoteFtpBasePath, "");
+	}
+
+	private void listDirectoryRecursive(final String parentDir, final String currentDir) {
+		String dirToList = parentDir;
+		if (!currentDir.equals("")) {
+			dirToList += "/" + currentDir;
+		}
+		FTPFile[] subFiles;
+		try {
+			subFiles = ftpClient.listFiles(dirToList);
+			if ((subFiles != null) && (subFiles.length > 0)) {
+				for (FTPFile aFile : subFiles) {
+					String currentFileName = aFile.getName();
+
+					if (currentFileName.equals(".") || currentFileName.equals("..")) {
+						// skip parent directory and directory itself
+						continue;
+					}
+					if (aFile.isDirectory()) {
+						if (isRecursive) {
+							listDirectoryRecursive(dirToList, currentFileName);
+						}
+					} else {
+						// test the file for extensions compliance and, just in case, add it to the list.
+						for (String ext : extensionsSet) {
+							if (currentFileName.endsWith(ext)) {
+								//incremental mode: let's check the last update date
+								if(incremental){
+									Calendar timestamp = aFile.getTimestamp();
+									DateTime lastModificationDate = new DateTime(timestamp);
+									if(lastModificationDate.isAfter(fromDate)){
+										queue.add(dirToList + "/" + currentFileName);
+										log.debug(currentFileName + " has changed and must be re-collected");
+									} else {
+										if (log.isDebugEnabled()) {
+											log.debug(currentFileName + " has not changed since last collection");
+										}
+									}
+								}
+								else {
+									//not incremental: just add it to the queue
+									queue.add(dirToList + "/" + currentFileName);
+								}
+							}
+						}
+					}
+				}
+			}
+		} catch (IOException e) {
+			throw new CollectorServiceRuntimeException("Unable to list FTP remote folder", e);
+		}
+	}
+
+	@Override
+	public boolean hasNext() {
+		if (queue.isEmpty()) {
+			disconnectFromFtpServer();
+			return false;
+		} else {
+			return true;
+		}
+	}
+
+	@Override
+	public String next() {
+		String nextRemotePath = queue.remove();
+		int nRepeat = 0;
+		while (nRepeat < MAX_RETRIES) {
+			try {
+				OutputStream baos = new ByteArrayOutputStream();
+				if (!ftpClient.isConnected()) {
+					connectToFtpServer();
+				}
+				ftpClient.retrieveFile(nextRemotePath, baos);
+
+				log.debug(String.format("Collected file from FTP: %s%s", ftpServerAddress, nextRemotePath));
+				return baos.toString();
+			} catch (IOException e) {
+				nRepeat++;
+				log.warn(String.format("An error occurred [%s] for %s%s, retrying.. [retried %s time(s)]", e.getMessage(), ftpServerAddress, nextRemotePath,
+						nRepeat));
+				disconnectFromFtpServer();
+				try {
+					Thread.sleep(BACKOFF_MILLIS);
+				} catch (InterruptedException e1) {
+					log.error(e1);
+				}
+			}
+		}
+		throw new CollectorServiceRuntimeException(String.format("Impossible to retrieve FTP file %s after %s retries. Aborting FTP collection.", nextRemotePath, nRepeat));
+	}
+
+	@Override
+	public void remove() {
+		throw new UnsupportedOperationException();
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpIteratorFactory.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/ftp/FtpIteratorFactory.java
@ -0,0 +1,20 @@
+package eu.dnetlib.data.collector.plugins.ftp;
+
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ *
+ * @author Author: Andrea Mannocci
+ *
+ */
+public class FtpIteratorFactory {
+
+	public Iterator<String> newIterator(final String baseUrl,
+										final String username,
+										final String password,
+										final boolean isRecursive,
+										final Set<String> extensionsSet, final String fromDate) {
+		return new FtpIterator(baseUrl, username, password, isRecursive, extensionsSet, fromDate);
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/Connector.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/Connector.java
@ -0,0 +1,37 @@
+package eu.dnetlib.data.collector.plugins.httpfilename;
+
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+
+
+/**
+ * Created by miriam on 07/05/2018.
+ */
+public class Connector extends HttpConnector implements ConnectorInterface  {
+    private String response;
+
+    @Override
+    public void get(final String requestUrl) throws CollectorServiceException {
+        response = getInputSource(requestUrl);
+    }
+
+    @Override
+    public String getResponse() {
+        return response;
+    }
+
+    @Override
+    public boolean isStatusOk() {
+        return (response != null);
+    }
+
+    @Override
+    public boolean responseTypeContains(String string) {
+        String responseType = getResponseType();
+        if (responseType != null)
+            return responseType.contains(string);
+        return false;
+    }
+
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/ConnectorInterface.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/ConnectorInterface.java
@ -0,0 +1,19 @@
+package eu.dnetlib.data.collector.plugins.httpfilename;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+
+/**
+ * Created by miriam on 07/05/2018.
+ */
+public interface ConnectorInterface {
+
+    void get(final String requestUrl) throws CollectorServiceException;
+
+    String getResponse();
+
+    boolean isStatusOk();
+
+
+    boolean responseTypeContains(String string);
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/HTTPWithFileNameCollectorIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/HTTPWithFileNameCollectorIterable.java
@ -0,0 +1,190 @@
+package eu.dnetlib.data.collector.plugins.httpfilename;
+
+import java.util.*;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.json.JSONObject;
+import org.json.XML;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+/**
+ * Created by miriam on 04/05/2018.
+ */
+public class HTTPWithFileNameCollectorIterable implements Iterable<String> {
+
+    private static final Log log = LogFactory.getLog(HTTPWithFileNameCollectorIterable.class);
+
+    private static final String JUNK = "<resource><url>%s</url><DOI>JUNK</DOI></resource>";
+    public static final String APP_JSON = "application/json";
+    public static final String APP_XML = "application/xml";
+    public static final String TEXT_HTML = "text/html";
+    private final ArrayBlockingQueue<String> queue = new ArrayBlockingQueue<String>(100);
+
+
+
+
+    private String filterParam;
+
+    int total = 0;
+    int filtered = 0;
+
+    public HTTPWithFileNameCollectorIterable(String startUrl, String filter){
+
+        this.filterParam = filter;
+        Thread ft = new Thread(new FillMetaQueue(startUrl) );
+        ft.start();
+    }
+
+
+    @Override
+    public Iterator<String> iterator() {
+        return new HttpWithFileNameCollectorIterator(queue);
+    }
+
+    private class FillMetaQueue implements Runnable {
+        final Connector c = new Connector();
+
+        private final List<String> metas = Collections.synchronizedList(new ArrayList<String>());
+        private final List<String> urls = Collections.synchronizedList(new ArrayList<>());
+
+        public FillMetaQueue(String startUrl){
+            if(!startUrl.isEmpty()){
+                urls.add(startUrl);
+            }
+        }
+
+
+        public void fillQueue() {
+            String url;
+
+            while((metas.size()>0 || urls.size() > 0 )) {
+                log.debug("metas.size() = " + metas.size() + " urls.size() = " + urls.size() + " queue.size() = " +queue.size());
+                if (metas.size() > 0) {
+                    url = metas.remove(0);
+                    try {
+                        c.get(url);
+                    } catch (CollectorServiceException e) {
+                        log.info("Impossible to collect url: " + url + " error: " + e.getMessage());
+                    }
+                    if(c.isStatusOk()){
+                        try {
+                            String ret = c.getResponse();
+                            if (ret != null && ret.length()>0) {
+                                if (!containsFilter(ret))
+                                    queue.put(addFilePath(ret, url, url.endsWith(".json")));
+                                    //queue.offer(addFilePath(ret, url, url.endsWith(".json")), HttpWithFileNameCollectorIterator.waitTime, TimeUnit.SECONDS);
+                                else
+                                    filtered++;
+                                total++;
+                            }
+                        } catch (InterruptedException e) {
+                            log.info("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
+
+                        }
+                    }
+                } else {
+                    url = urls.remove(0);
+                    try {
+                        c.get(url);
+                    } catch (CollectorServiceException e) {
+                        log.info("Impossible to collect url: " + url + " error: " + e.getMessage());
+                    }
+                    if(c.isStatusOk()) {
+                        if (c.responseTypeContains(TEXT_HTML)){
+                            recurFolder(c.getResponse(), url);
+                        } else if(c.responseTypeContains(APP_JSON) || c.responseTypeContains(APP_XML)){
+                            try {
+                                final String element = addFilePath(c.getResponse(), url, c.responseTypeContains(APP_JSON));
+                                //queue.offer(element, HttpWithFileNameCollectorIterator.waitTime, TimeUnit.SECONDS);
+                                queue.put(element);
+                            } catch (InterruptedException e) {
+                                log.info("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
+                            }
+                        }
+                    }
+                }
+
+            }
+            try {
+                //queue.offer(HttpWithFileNameCollectorIterator.TERMINATOR, HttpWithFileNameCollectorIterator.waitTime, TimeUnit.SECONDS);
+                queue.put(HttpWithFileNameCollectorIterator.TERMINATOR);
+            } catch (InterruptedException e) {
+                throw new IllegalStateException(String.format("could not add element to queue for more than %s%s", HttpWithFileNameCollectorIterator.waitTime, TimeUnit.SECONDS), e);
+            }
+
+        }
+
+        private boolean containsFilter(String meta){
+            if (filterParam == null || filterParam.isEmpty())
+                return false;
+            String[] filter = filterParam.split(";");
+            for(String item:filter){
+                if (meta.contains(item))
+                    return true;
+            }
+            return false;
+        }
+
+        private String addFilePath(String meta, String url, boolean isJson){
+            String path = url.replace("metadata", "pdf");
+
+            try {
+                if(isJson)
+                    meta = meta.substring(0, meta.length() - 1) + ",'downloadFileUrl':'" + path.substring(0, path.indexOf(".json")) + ".pdf'}";
+                else {
+
+                    if (meta.contains("<!DOCTYPE")) {
+                        meta = meta.substring(meta.indexOf("<!DOCTYPE"));
+                        meta = meta.substring(meta.indexOf(">") + 1);
+                    }
+                    int index = meta.lastIndexOf("</");
+                    meta = meta.substring(0, index) + "<downloadFileUrl>" + path.substring(0, path.indexOf(".xml")) + ".pdf</downloadFileUrl>" + meta.substring(index);
+                }
+            } catch(Exception ex) {
+                log.info("not file with extension .json or .xml");
+            }
+
+
+            if(isJson) {
+                try {
+                    return XML.toString(new JSONObject("{'resource':" + meta + "}"));
+                } catch(Exception e) {
+                    log.fatal("Impossible to transform json object to xml \n" + meta + "\n " + e.getMessage() + "\n" + url);
+                   // throw new RuntimeException();
+                    final String junk = String.format(JUNK, url);
+                    log.warn("returning " + junk);
+                    return junk;
+                }
+            }
+            return meta;
+        }
+
+        private void recurFolder(String text, String url){
+            Document doc = Jsoup.parse(text);
+            Elements links = doc.select("a");
+            for(Element e:links){
+                if (!e.text().equals("../")){
+                    String file = e.attr("href");
+                    if(file.endsWith(".json") || file.endsWith(".xml"))
+                        metas.add(url+file);
+                    else
+                        urls.add(url+file);
+                }
+            }
+        }
+
+
+        @Override
+        public void run() {
+            fillQueue();
+        }
+    }
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/HTTPWithFileNameCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/HTTPWithFileNameCollectorPlugin.java
@ -0,0 +1,16 @@
+package eu.dnetlib.data.collector.plugins.httpfilename;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ * Created by miriam on 04/05/2018.
+ */
+public class HTTPWithFileNameCollectorPlugin extends AbstractCollectorPlugin {
+
+    @Override
+    public Iterable<String> collect(InterfaceDescriptor interfaceDescriptor, String s, String s1) throws CollectorServiceException {
+        return new HTTPWithFileNameCollectorIterable(interfaceDescriptor.getBaseUrl(), interfaceDescriptor.getParams().get("filter"));
+    }
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/HttpWithFileNameCollectorIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httpfilename/HttpWithFileNameCollectorIterator.java
@ -0,0 +1,63 @@
+package eu.dnetlib.data.collector.plugins.httpfilename;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Objects;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Created by miriam on 25/06/2018.
+ */
+public class HttpWithFileNameCollectorIterator implements Iterator<String> {
+    public static final String TERMINATOR = "FINITO";
+    private static final Log log = LogFactory.getLog(HttpWithFileNameCollectorIterator.class);
+
+     private final ArrayBlockingQueue<String> queue;
+
+    public static final long waitTime = 60L;
+
+    private String last = "<resource><DOI>JUNK</DOI></resource>";
+
+    public HttpWithFileNameCollectorIterator(ArrayBlockingQueue<String> queue) {
+        this.queue = queue;
+        extractFromQueue();
+    }
+
+    @Override
+        public boolean hasNext() {
+
+
+            //return !(Objects.equals(last, TERMINATOR) || Objects.equals(last,null));
+            return !(Objects.equals(last, TERMINATOR));
+        }
+
+        @Override
+        public String next() {
+            try{
+
+                return last;
+
+            }finally{
+                extractFromQueue();
+            }
+
+        }
+
+    private void extractFromQueue() {
+
+
+        try {
+            last = queue.take();
+            //last = queue.poll(waitTime, TimeUnit.SECONDS);
+        }catch(InterruptedException e){
+            log.warn("Interrupted while waiting for element to consume");
+            throw new NoSuchElementException(e.getMessage());
+        }
+    }
+
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httplist/HttpListCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httplist/HttpListCollectorPlugin.java
@ -0,0 +1,21 @@
+package eu.dnetlib.data.collector.plugins.httplist;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.springframework.beans.factory.annotation.Autowired;
+
+public class HttpListCollectorPlugin extends AbstractCollectorPlugin {
+
+	@Autowired
+	private HttpConnector httpConnector;
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+		final String listAddress = interfaceDescriptor.getParams().get("listUrl");
+
+		return () -> new HttpListIterator(baseUrl, listAddress, httpConnector);
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httplist/HttpListIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/httplist/HttpListIterator.java
@ -0,0 +1,64 @@
+package eu.dnetlib.data.collector.plugins.httplist;
+
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Iterator;
+
+public class HttpListIterator implements Iterator<String> {
+
+	private HttpConnector httpConnector;
+
+	private String baseUrl;
+	private String currentLine;
+	private BufferedReader reader;
+
+	public HttpListIterator(final String baseUrl, final String listAddress, final HttpConnector httpConnector) {
+		try {
+			this.baseUrl = baseUrl;
+			this.reader = new BufferedReader(new StringReader(download(listAddress)));
+			this.httpConnector = httpConnector;
+			this.currentLine = reader.readLine();
+		} catch (Exception e) {
+			throw new RuntimeException("Error creating iterator", e);
+		}
+	}
+
+	@Override
+	public synchronized boolean hasNext() {
+		return StringUtils.isNotBlank(currentLine);
+	}
+
+	@Override
+	public synchronized String next() {
+		try {
+			if (StringUtils.isNotBlank(currentLine)) {
+				return download(baseUrl + currentLine);
+			} else {
+				throw new RuntimeException("Iterator has reached the end");
+			}
+		} finally {
+			try {
+				this.currentLine = reader.readLine();
+			} catch (IOException e) {
+				throw new RuntimeException("Error obtaining next element " + currentLine, e);
+			}
+		}
+	}
+
+	private String download(final String url) {
+		try {
+			return httpConnector.getInputSource(url);
+		} catch (CollectorServiceException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	@Override
+	public void remove() {}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/mongo/MongoDumpIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/mongo/MongoDumpIterable.java
@ -0,0 +1,43 @@
+package eu.dnetlib.data.collector.plugins.mongo;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.util.Iterator;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.information.collectionservice.rmi.CollectionServiceException;
+
+/**
+ * The Class MongoDumpIterable.
+ */
+public class MongoDumpIterable implements Iterable<String> {
+
+	/** The input stream. */
+	private final FileReader inputStream;
+
+	/**
+	 * Instantiates a new mongo dump iterable.
+	 *
+	 * @param inputFile the input file
+	 * @throws CollectionServiceException the collection service exception
+	 */
+	public MongoDumpIterable(final File inputFile) throws CollectorServiceException {
+		try {
+			this.inputStream = new FileReader(inputFile);
+		} catch (FileNotFoundException e) {
+			throw new CollectorServiceException("Error unable to open inputStream", e);
+		}
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.lang.Iterable#iterator()
+	 */
+	@Override
+	public Iterator<String> iterator() {
+		return new MongoDumpIterator(inputStream);
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/mongo/MongoDumpIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/mongo/MongoDumpIterator.java
@ -0,0 +1,56 @@
+package eu.dnetlib.data.collector.plugins.mongo;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Iterator;
+
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+
+public class MongoDumpIterator implements Iterator<String> {
+
+	private final BufferedReader inputStream;
+	private String currentLine = null;
+
+	public MongoDumpIterator(final FileReader inputStream) {
+		this.inputStream = new BufferedReader(inputStream);
+		this.currentLine = getNextLine();
+	}
+
+	@Override
+	public boolean hasNext() {
+		return currentLine != null;
+
+	}
+
+	@Override
+	public String next() {
+		final String returnedString = this.currentLine;
+		this.currentLine = getNextLine();
+		return returnedString;
+	}
+
+	@Override
+	public void remove() {
+		// TODO Auto-generated method stub
+
+	}
+
+	private String getNextLine() {
+		try {
+			String input = inputStream.readLine();
+			while (input != null) {
+				JsonElement jElement = new JsonParser().parse(input);
+				JsonObject jobject = jElement.getAsJsonObject();
+				if (jobject.has("body")) { return jobject.get("body").getAsString(); }
+				input = inputStream.readLine();
+			}
+			return null;
+
+		} catch (IOException e) {
+			return null;
+		}
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/mongo/MongoDumpPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/mongo/MongoDumpPlugin.java
@ -0,0 +1,23 @@
+package eu.dnetlib.data.collector.plugins.mongo;
+
+import java.io.File;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+public class MongoDumpPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
+		final File f = new File(baseUrl);
+		if (f.exists() == false) { throw new CollectorServiceException("the file at url " + baseUrl + " does not exists"); }
+
+		return new MongoDumpIterable(f);
+
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiCollectorPlugin.java
@ -0,0 +1,58 @@
+package eu.dnetlib.data.collector.plugins.oai;
+
+import java.util.List;
+
+import com.google.common.base.Splitter;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.springframework.beans.factory.annotation.Required;
+
+public class OaiCollectorPlugin extends AbstractCollectorPlugin {
+
+	private static final String FORMAT_PARAM = "format";
+	private static final String OAI_SET_PARAM = "set";
+
+	private OaiIteratorFactory oaiIteratorFactory;
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+		final String mdFormat = interfaceDescriptor.getParams().get(FORMAT_PARAM);
+		final String setParam = interfaceDescriptor.getParams().get(OAI_SET_PARAM);
+		final List<String> sets = Lists.newArrayList();
+		if (setParam != null) {
+			sets.addAll(Lists.newArrayList(Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
+		}
+		if (sets.isEmpty()) {
+			// If no set is defined, ALL the sets must be harvested
+			sets.add("");
+		}
+
+		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
+
+		if (mdFormat == null || mdFormat.isEmpty()) { throw new CollectorServiceException("Param 'mdFormat' is null or empty"); }
+
+		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); }
+
+		if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + untilDate); }
+
+		return () -> Iterators.concat(
+				sets.stream()
+					.map(set -> oaiIteratorFactory.newIterator(baseUrl, mdFormat, set, fromDate, untilDate))
+					.iterator());
+	}
+
+	public OaiIteratorFactory getOaiIteratorFactory() {
+		return oaiIteratorFactory;
+	}
+
+	@Required
+	public void setOaiIteratorFactory(final OaiIteratorFactory oaiIteratorFactory) {
+		this.oaiIteratorFactory = oaiIteratorFactory;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIterator.java
@ -0,0 +1,168 @@
+package eu.dnetlib.data.collector.plugins.oai;
+
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.Iterator;
+import java.util.Queue;
+import java.util.concurrent.PriorityBlockingQueue;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.Node;
+import org.dom4j.io.SAXReader;
+
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+
+import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
+
+public class OaiIterator implements Iterator<String> {
+
+	private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
+
+	private Queue<String> queue = new PriorityBlockingQueue<String>();
+	private SAXReader reader = new SAXReader();
+
+	private String baseUrl;
+	private String set;
+	private String mdFormat;
+	private String fromDate;
+	private String untilDate;
+	private String token;
+	private boolean started;
+	private HttpConnector httpConnector;
+
+	public OaiIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate, final HttpConnector httpConnector) {
+		this.baseUrl = baseUrl;
+		this.mdFormat = mdFormat;
+		this.set = set;
+		this.fromDate = fromDate;
+		this.untilDate = untilDate;
+		this.started = false;
+		this.httpConnector = httpConnector;
+	}
+	
+	private void verifyStarted() {
+		if (!this.started) {
+			this.started = true;
+			try {
+				this.token = firstPage();
+			} catch (CollectorServiceException e) {
+				throw new RuntimeException(e);
+			}
+		}
+	}
+
+	@Override
+	public boolean hasNext() {
+		synchronized (queue) {
+			verifyStarted();
+			return !queue.isEmpty();
+		}
+	}
+
+	@Override
+	public String next() {
+		synchronized (queue) {
+			verifyStarted();
+			final String res = queue.poll();
+			while (queue.isEmpty() && (token != null) && !token.isEmpty()) {
+				try {
+					token = otherPages(token);
+				} catch (CollectorServiceException e) {
+					throw new RuntimeException(e);
+				}
+			}
+			return res;
+		}
+	}
+
+	@Override
+	public void remove() {}
+
+	private String firstPage() throws CollectorServiceException {
+		try {
+			String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat,"UTF-8");
+			if ((set != null) && !set.isEmpty()) {
+				url += "&set=" + URLEncoder.encode(set,"UTF-8");
+			}
+			if ((fromDate != null) && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
+				url += "&from=" + URLEncoder.encode(fromDate,"UTF-8");
+			}
+			if ((untilDate != null) && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
+				url += "&until=" + URLEncoder.encode(untilDate,"UTF-8");
+			}
+			log.info("Start harvesting using url: " + url);
+	
+			return downloadPage(url);
+		} catch(UnsupportedEncodingException e) {
+			throw new CollectorServiceException(e);
+		}
+	}
+
+	private String extractResumptionToken(final String xml) {
+
+		final String s = StringUtils.substringAfter(xml, "<resumptionToken");
+		if (s == null){
+			return null;
+		}
+
+		final String result = StringUtils.substringBetween(s, ">", "</");
+		if (result == null)
+			return null;
+		return  result.trim();
+
+
+	}
+
+	private String otherPages(final String resumptionToken) throws CollectorServiceException {
+		try {
+			return downloadPage(baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(resumptionToken,"UTF-8"));
+		} catch (UnsupportedEncodingException e) {
+			throw new CollectorServiceException(e);
+		}
+	}
+
+    private String downloadPage(final String url) throws CollectorServiceException {
+
+        final String xml = httpConnector.getInputSource(url);
+        Document doc;
+        try {
+            doc = reader.read(new StringReader(xml));
+        } catch (DocumentException e) {
+            log.warn("Error parsing xml, I try to clean it: " + xml, e);
+            final String cleaned = XmlCleaner.cleanAllEntities(xml);
+            try {
+                doc = reader.read(new StringReader(cleaned));
+            } catch (DocumentException e1) {
+                final String resumptionToken = extractResumptionToken(xml);
+                if (resumptionToken == null)
+                    throw new CollectorServiceException("Error parsing cleaned document:" + cleaned, e1);
+                return resumptionToken;
+            }
+        }
+
+        final Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
+        if (errorNode != null) {
+            final String code = errorNode.valueOf("@code");
+            if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
+                log.warn("noRecordsMatch for oai call: " + url);
+                return null;
+            } else {
+                throw new CollectorServiceException(code + " - " + errorNode.getText());
+            }
+        }
+
+        for (Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
+            queue.add(((Node) o).asXML());
+        }
+
+        return doc.valueOf("//*[local-name()='resumptionToken']");
+
+    }
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIteratorFactory.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIteratorFactory.java
@ -0,0 +1,25 @@
+package eu.dnetlib.data.collector.plugins.oai;
+
+import java.util.Iterator;
+
+import org.springframework.beans.factory.annotation.Required;
+
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+
+public class OaiIteratorFactory {
+
+	private HttpConnector httpConnector;
+
+	public Iterator<String> newIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate) {
+		return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, httpConnector);
+	}
+
+	public HttpConnector getHttpConnector() {
+		return httpConnector;
+	}
+
+	@Required
+	public void setHttpConnector(HttpConnector httpConnector) {
+		this.httpConnector = httpConnector;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oai/engine/XmlCleaner.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oai/engine/XmlCleaner.java
@ -0,0 +1,268 @@
+package eu.dnetlib.data.collector.plugins.oai.engine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+/**
+ * @author jochen, Andreas Czerniak
+ *
+ */
+public class XmlCleaner {
+	/**
+	 * Pattern for numeric entities.
+	 */
+	private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$
+	//	    private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); //$NON-NLS-1$
+        
+        // see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11;
+	private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
+        
+	/**
+	 * Pattern that negates the allowable XML 4 byte unicode characters. Valid
+	 * are: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
+	 * [#x10000-#x10FFFF]
+	 */
+	private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); //$NON-NLS-1$
+
+	// Map entities to their unicode equivalent
+	private static Set<String> goodEntities = new HashSet<String>();
+	private static Map<String, String> badEntities = new HashMap<String, String>();
+	
+	static {
+		// pre-defined XML entities
+		goodEntities.add("&quot;"); //$NON-NLS-1$ // quotation mark
+		goodEntities.add("&amp;"); //$NON-NLS-1$ // ampersand
+		goodEntities.add("&lt;"); //$NON-NLS-1$ // less-than sign
+		goodEntities.add("&gt;"); //$NON-NLS-1$ // greater-than sign
+		// control entities
+		//badEntities.put("&#11;", "");
+		badEntities.put("&#127;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#128;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#129;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#130;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#131;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#132;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#133;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#134;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#135;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#136;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#137;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#138;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#139;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#140;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#141;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#142;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#143;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#144;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#145;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#146;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#147;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#148;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#149;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#150;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#151;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#152;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#153;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#154;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#155;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#156;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#157;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#158;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		badEntities.put("&#159;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
+		// misc entities
+		badEntities.put("&euro;", "\u20AC"); //$NON-NLS-1$ //$NON-NLS-2$ // euro
+		badEntities.put("&lsquo;", "\u2018"); //$NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
+		badEntities.put("&rsquo;", "\u2019"); //$NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
+		// Latin 1 entities
+		badEntities.put("&nbsp;", "\u00A0"); //$NON-NLS-1$ //$NON-NLS-2$ // no-break space
+		badEntities.put("&iexcl;", "\u00A1"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
+		badEntities.put("&cent;", "\u00A2"); //$NON-NLS-1$ //$NON-NLS-2$ // cent sign
+		badEntities.put("&pound;", "\u00A3"); //$NON-NLS-1$ //$NON-NLS-2$ // pound sign
+		badEntities.put("&curren;", "\u00A4"); //$NON-NLS-1$ //$NON-NLS-2$ // currency sign
+		badEntities.put("&yen;", "\u00A5"); //$NON-NLS-1$ //$NON-NLS-2$ // yen sign
+		badEntities.put("&brvbar;", "\u00A6"); //$NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
+		badEntities.put("&sect;", "\u00A7"); //$NON-NLS-1$ //$NON-NLS-2$ // section sign
+		badEntities.put("&uml;", "\u00A8"); //$NON-NLS-1$ //$NON-NLS-2$ // diaeresis
+		badEntities.put("&copy;", "\u00A9"); //$NON-NLS-1$ //$NON-NLS-2$ // copyright sign
+		badEntities.put("&ordf;", "\u00AA"); //$NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
+		badEntities.put("&laquo;", "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
+		badEntities.put("&not;", "\u00AC"); //$NON-NLS-1$ //$NON-NLS-2$ // not sign
+		badEntities.put("&shy;", "\u00AD"); //$NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
+		badEntities.put("&reg;", "\u00AE"); //$NON-NLS-1$ //$NON-NLS-2$ // registered sign
+		badEntities.put("&macr;", "\u00AF"); //$NON-NLS-1$ //$NON-NLS-2$ // macron
+		badEntities.put("&deg;", "\u00B0"); //$NON-NLS-1$ //$NON-NLS-2$ // degree sign
+		badEntities.put("&plusmn;", "\u00B1"); //$NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
+		badEntities.put("&sup2;", "\u00B2"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript two
+		badEntities.put("&sup3;", "\u00B3"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript three
+		badEntities.put("&acute;", "\u00B4"); //$NON-NLS-1$ //$NON-NLS-2$ // acute accent
+		badEntities.put("&micro;", "\u00B5"); //$NON-NLS-1$ //$NON-NLS-2$ // micro sign
+		badEntities.put("&para;", "\u00B6"); //$NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
+		badEntities.put("&middot;", "\u00B7"); //$NON-NLS-1$ //$NON-NLS-2$ // middle dot
+		badEntities.put("&cedil;", "\u00B8"); //$NON-NLS-1$ //$NON-NLS-2$ // cedilla
+		badEntities.put("&sup1;", "\u00B9"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript one
+		badEntities.put("&ordm;", "\u00BA"); //$NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
+		badEntities.put("&raquo;", "\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
+		badEntities.put("&frac14;", "\u00BC"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
+		badEntities.put("&frac12;", "\u00BD"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
+		badEntities.put("&frac34;", "\u00BE"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
+		badEntities.put("&iquest;", "\u00BF"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
+		badEntities.put("&Agrave;", "\u00C0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
+		badEntities.put("&Aacute;", "\u00C1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
+		badEntities.put("&Acirc;", "\u00C2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
+		badEntities.put("&Atilde;", "\u00C3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
+		badEntities.put("&Auml;", "\u00C4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
+		badEntities.put("&Aring;", "\u00C5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
+		badEntities.put("&AElig;", "\u00C6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
+		badEntities.put("&Ccedil;", "\u00C7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
+		badEntities.put("&Egrave;", "\u00C8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
+		badEntities.put("&Eacute;", "\u00C9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
+		badEntities.put("&Ecirc;", "\u00CA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
+		badEntities.put("&Euml;", "\u00CB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
+		badEntities.put("&Igrave;", "\u00CC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
+		badEntities.put("&Iacute;", "\u00CD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
+		badEntities.put("&Icirc;", "\u00CE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
+		badEntities.put("&Iuml;", "\u00CF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
+		badEntities.put("&ETH;", "\u00D0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
+		badEntities.put("&Ntilde;", "\u00D1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
+		badEntities.put("&Ograve;", "\u00D2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
+		badEntities.put("&Oacute;", "\u00D3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
+		badEntities.put("&Ocirc;", "\u00D4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
+		badEntities.put("&Otilde;", "\u00D5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
+		badEntities.put("&Ouml;", "\u00D6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
+		badEntities.put("&times;", "\u00D7"); //$NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
+		badEntities.put("&Oslash;", "\u00D8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
+		badEntities.put("&Ugrave;", "\u00D9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
+		badEntities.put("&Uacute;", "\u00DA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
+		badEntities.put("&Ucirc;", "\u00DB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
+		badEntities.put("&Uuml;", "\u00DC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
+		badEntities.put("&Yacute;", "\u00DD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
+		badEntities.put("&THORN;", "\u00DE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
+		badEntities.put("&szlig;", "\u00DF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
+		badEntities.put("&agrave;", "\u00E0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
+		badEntities.put("&aacute;", "\u00E1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
+		badEntities.put("&acirc;", "\u00E2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
+		badEntities.put("&atilde;", "\u00E3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
+		badEntities.put("&auml;", "\u00E4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
+		badEntities.put("&aring;", "\u00E5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
+		badEntities.put("&aelig;", "\u00E6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
+		badEntities.put("&ccedil;", "\u00E7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
+		badEntities.put("&egrave;", "\u00E8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
+		badEntities.put("&eacute;", "\u00E9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
+		badEntities.put("&ecirc;", "\u00EA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
+		badEntities.put("&euml;", "\u00EB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
+		badEntities.put("&igrave;", "\u00EC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
+		badEntities.put("&iacute;", "\u00ED"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
+		badEntities.put("&icirc;", "\u00EE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
+		badEntities.put("&iuml;", "\u00EF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
+		badEntities.put("&eth;", "\u00F0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
+		badEntities.put("&ntilde;", "\u00F1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
+		badEntities.put("&ograve;", "\u00F2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
+		badEntities.put("&oacute;", "\u00F3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
+		badEntities.put("&ocirc;", "\u00F4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
+		badEntities.put("&otilde;", "\u00F5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
+		badEntities.put("&ouml;", "\u00F6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
+		badEntities.put("&divide;", "\u00F7"); //$NON-NLS-1$ //$NON-NLS-2$ // division sign
+		badEntities.put("&oslash;", "\u00F8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
+		badEntities.put("&ugrave;", "\u00F9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
+		badEntities.put("&uacute;", "\u00FA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
+		badEntities.put("&ucirc;", "\u00FB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
+		badEntities.put("&uuml;", "\u00FC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
+		badEntities.put("&yacute;", "\u00FD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
+		badEntities.put("&thorn;", "\u00FE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
+		badEntities.put("&yuml;", "\u00FF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
+	}
+	/**
+	 * For each entity in the input that is not allowed in XML, replace the
+	 * entity with its unicode equivalent or remove it. For each instance of a
+	 * bare {@literal &}, replace it with {@literal &amp;<br/>}
+	 * XML only allows 4 entities: {@literal &amp;amp;}, {@literal &amp;quot;}, {@literal &amp;lt;} and {@literal &amp;gt;}.
+	 *
+	 * @param broken
+	 *            the string to handle entities
+	 * @return the string with entities appropriately fixed up
+	 */
+	static public String cleanAllEntities(final String broken) {
+		if (broken == null) {
+			return null;
+		}
+
+		String working = invalidControlCharPattern.matcher(broken).replaceAll("");
+		working = invalidCharacterPattern.matcher(working).replaceAll("");
+		
+		int cleanfrom = 0;
+		
+		while (true) {
+			int amp = working.indexOf('&', cleanfrom);
+			// If there are no more amps then we are done
+			if (amp == -1) {
+				break;
+			}
+			// Skip references of the kind &#ddd;
+			if (validCharacterEntityPattern.matcher(working.substring(amp)).find()) {
+				cleanfrom = working.indexOf(';', amp) + 1;
+				continue;
+			}
+			int i = amp + 1;
+			while (true) {
+				// if we are at the end of the string then just escape the '&';
+				if (i >= working.length()) {
+					return working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$
+				}
+				// if we have come to a ; then we have an entity
+				// If it is something that xml can't handle then replace it.
+				char c = working.charAt(i);
+				if (c == ';') {
+					final String entity = working.substring(amp, i + 1);
+					final String replace = handleEntity(entity);
+					working = working.substring(0, amp) + replace + working.substring(i + 1);
+					break;
+				}
+				// Did we end an entity without finding a closing ;
+				// Then treat it as an '&' that needs to be replaced with &amp;
+				if (!Character.isLetterOrDigit(c)) {
+					working = working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$
+					amp = i + 4; // account for the 4 extra characters
+					break;
+				}
+				i++;
+			}
+			cleanfrom = amp + 1;
+		}
+
+		if (Pattern.compile("<<").matcher(working).find()) {
+			working = working.replaceAll("<<", "&lt;&lt;");
+		}
+
+		if (Pattern.compile(">>").matcher(working).find()) {
+			working = working.replaceAll(">>", "&gt;&gt;");
+		}
+		
+		return working;
+	}
+
+	/**
+	 * Replace entity with its unicode equivalent, if it is not a valid XML
+	 * entity. Otherwise strip it out. XML only allows 4 entities: &amp;amp;,
+	 * &amp;quot;, &amp;lt; and &amp;gt;.
+	 *
+	 * @param entity
+	 *            the entity to be replaced
+	 * @return the substitution for the entity, either itself, the unicode
+	 *         equivalent or an empty string.
+	 */
+	private static String handleEntity(final String entity) {
+		if (goodEntities.contains(entity)) {
+			return entity;
+		}
+		
+		final String replace = (String) badEntities.get(entity);
+		if (replace != null) {
+			return replace;
+		}
+		
+		return replace != null ? replace : "";
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oaisets/OaiSetsCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oaisets/OaiSetsCollectorPlugin.java
@ -0,0 +1,40 @@
+package eu.dnetlib.data.collector.plugins.oaisets;
+
+import java.util.Iterator;
+
+import org.springframework.beans.factory.annotation.Required;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+public class OaiSetsCollectorPlugin extends AbstractCollectorPlugin {
+
+	private OaiSetsIteratorFactory oaiSetsIteratorFactory;
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		final String baseUrl = interfaceDescriptor.getBaseUrl();
+
+		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
+
+		return new Iterable<String>() {
+
+			@Override
+			public Iterator<String> iterator() {
+				return oaiSetsIteratorFactory.newIterator(baseUrl);
+			}
+		};
+	}
+
+	public OaiSetsIteratorFactory getOaiSetsIteratorFactory() {
+		return oaiSetsIteratorFactory;
+	}
+
+	@Required
+	public void setOaiSetsIteratorFactory(final OaiSetsIteratorFactory oaiSetsIteratorFactory) {
+		this.oaiSetsIteratorFactory = oaiSetsIteratorFactory;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oaisets/OaiSetsIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oaisets/OaiSetsIterator.java
@ -0,0 +1,133 @@
+package eu.dnetlib.data.collector.plugins.oaisets;
+
+import java.io.StringReader;
+import java.util.Iterator;
+import java.util.Queue;
+import java.util.Set;
+import java.util.concurrent.PriorityBlockingQueue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.Element;
+import org.dom4j.Node;
+import org.dom4j.io.SAXReader;
+
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+
+public class OaiSetsIterator implements Iterator<String> {
+
+	private static final Log log = LogFactory.getLog(OaiSetsIterator.class);
+
+	private Queue<String> queue = new PriorityBlockingQueue<String>();
+	private SAXReader reader = new SAXReader();
+
+	private String baseUrl;
+
+	private String token;
+	private boolean started;
+	private HttpConnector httpConnector;
+
+	private Set<String> setsAlreadySeen = Sets.newHashSet();
+
+	public OaiSetsIterator(final String baseUrl, final HttpConnector httpConnector) {
+		this.baseUrl = baseUrl;
+		this.started = false;
+		this.httpConnector = httpConnector;
+	}
+
+	private void verifyStarted() {
+		if (!this.started) {
+			this.started = true;
+			try {
+				this.token = firstPage();
+			} catch (CollectorServiceException e) {
+				throw new RuntimeException(e);
+			}
+		}
+	}
+
+	@Override
+	public boolean hasNext() {
+		synchronized (queue) {
+			verifyStarted();
+			return !queue.isEmpty();
+		}
+	}
+
+	@Override
+	public String next() {
+		synchronized (queue) {
+			verifyStarted();
+			final String res = queue.poll();
+			while (queue.isEmpty() && (token != null) && !token.isEmpty()) {
+				try {
+					token = otherPages(token);
+				} catch (CollectorServiceException e) {
+					throw new RuntimeException(e);
+				}
+			}
+			return res;
+		}
+	}
+
+	@Override
+	public void remove() {}
+
+	private String firstPage() throws CollectorServiceException {
+		final String url = baseUrl + "?verb=ListSets";
+		log.info("Start harvesting using url: " + url);
+		return downloadPage(url);
+	}
+
+	private String otherPages(final String resumptionToken) throws CollectorServiceException {
+		return downloadPage(baseUrl + "?verb=ListSets&resumptionToken=" + resumptionToken);
+	}
+
+	private String downloadPage(final String url) throws CollectorServiceException {
+
+		final String xml = httpConnector.getInputSource(url);
+
+		Document doc;
+		try {
+			doc = reader.read(new StringReader(xml));
+		} catch (DocumentException e) {
+			log.warn("Error parsing xml, I try to clean it: " + xml, e);
+			final String cleaned = XmlCleaner.cleanAllEntities(xml);
+			try {
+				doc = reader.read(new StringReader(cleaned));
+			} catch (DocumentException e1) {
+				throw new CollectorServiceException("Error parsing cleaned document:" + cleaned, e1);
+			}
+		}
+
+		final Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
+		if (errorNode != null) {
+			final String code = errorNode.valueOf("@code");
+			if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
+				log.warn("noRecordsMatch for oai call: " + url);
+				return null;
+			} else throw new CollectorServiceException(code + " - " + errorNode.getText());
+		}
+
+		boolean sawAllSets = true;
+		for (Object o : doc.selectNodes("//*[local-name()='ListSets']/*[local-name()='set']")) {
+			String set = ((Element) o).valueOf("./*[local-name()='setSpec']");
+			if (!setsAlreadySeen.contains(set)) {
+				sawAllSets = false;
+				setsAlreadySeen.add(set);
+				queue.add(((Node) o).asXML());
+			}
+		}
+		if (sawAllSets) {
+			log.warn("URL " + baseUrl + " keeps returning the same OAI sets. Please contact the repo admin.");
+			System.out.println("URL " + baseUrl + " keeps returning the same OAI sets. Please contact the repo admin.");
+			return null;
+		} else return doc.valueOf("//*[local-name()='resumptionToken']");
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oaisets/OaiSetsIteratorFactory.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/oaisets/OaiSetsIteratorFactory.java
@ -0,0 +1,26 @@
+package eu.dnetlib.data.collector.plugins.oaisets;
+
+import java.util.Iterator;
+
+import org.springframework.beans.factory.annotation.Required;
+
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+
+public class OaiSetsIteratorFactory {
+	
+	private HttpConnector httpConnector;
+
+	public Iterator<String> newIterator(String baseUrl) {
+		return new OaiSetsIterator(baseUrl, httpConnector);
+	}
+	
+	public HttpConnector getHttpConnector() {
+		return httpConnector;
+	}
+
+	@Required
+	public void setHttpConnector(HttpConnector httpConnector) {
+		this.httpConnector = httpConnector;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/opentrial/OpenTrialIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/opentrial/OpenTrialIterator.java
@ -0,0 +1,117 @@
+package eu.dnetlib.data.collector.plugins.opentrial;
+
+/**
+ * Created by miriam on 07/03/2017.
+ */
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
+import org.apache.commons.io.IOUtils;
+import java.net.*;
+import java.util.Iterator;
+import java.util.concurrent.ArrayBlockingQueue;
+//import java.util.function.Consumer;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.json.*;
+
+
+
+public class OpenTrialIterator implements Iterable<String> {
+
+	private final String base_url;
+	private int total ;
+	private ArrayBlockingQueue<String> trials = new ArrayBlockingQueue<String>(100);
+	private int current = 0;
+	private static final Log log = LogFactory.getLog(OpenTrialIterator.class);
+
+	public OpenTrialIterator(String base_url, String from_date, String to_date)throws CollectorServiceException{
+		try {
+			String q = "per_page=100";
+			if (!(from_date == null)) {
+				if (!(to_date == null)) {
+					q = "q=registration_date%3A%5B" + from_date + "%20TO%20" + to_date + "%5D&" + q;
+
+				} else
+					q = "q=registration_date%3A%5B" + from_date + "%20TO%20*%5D&" + q;
+			}
+			this.base_url = base_url+ q;
+			log.info("url from which to collect " + this.base_url);
+			prepare();
+		}catch(Exception ex){
+			throw new CollectorServiceException(ex);
+		}
+	}
+
+	private void prepare()throws Exception {
+		JSONObject json = new JSONObject(getPage(1));
+		total = json.getInt("total_count");
+		log.info("Total number of entries to collect: " + total);
+		fillTrials(json);
+	}
+
+
+	@Override
+	public Iterator<String> iterator() {
+		return new Iterator<String>(){
+
+			private int page_number = 2;
+
+
+			@Override
+			public void remove(){
+
+			}
+
+			@Override
+			public String next() {
+				try {
+					if (trials.isEmpty()) {
+						JSONObject json = new JSONObject(getPage(page_number));
+						fillTrials(json);
+						page_number++;
+					}
+					return trials.poll();
+				}catch(Exception ex){
+					throw new CollectorServiceRuntimeException(ex);
+				}
+			}
+
+			@Override
+			public boolean hasNext(){
+				log.debug("More entries to collect: (" + current + "<" + total + "=" + (current < total));
+				return (current < total || !trials.isEmpty());
+			}
+
+
+		};
+
+	}
+
+	private void fillTrials(JSONObject json)throws CollectorServiceException{
+
+		JSONArray entries = json.getJSONArray("items");
+		for(Object entry: entries) {
+			try {
+				trials.put(XML.toString(entry));
+			}catch(Exception ex){
+				throw new CollectorServiceException(ex);
+			}
+			current++;
+		}
+
+	}
+	private String getPage(int page_number)throws CollectorServiceException {
+
+		try {
+			URL url = new URL(base_url + "&page=" + page_number);
+			URLConnection conn = url.openConnection();
+			conn.setRequestProperty("User-Agent", "Mozilla/5.0");
+			return (IOUtils.toString(conn.getInputStream()));
+		}catch(Exception ex){
+			throw new CollectorServiceException(ex);
+		}
+	}
+
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/opentrial/OpenTrialPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/opentrial/OpenTrialPlugin.java
@ -0,0 +1,27 @@
+package eu.dnetlib.data.collector.plugins.opentrial;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+
+
+/**
+ * Created by miriam on 07/03/2017.
+ */
+public class OpenTrialPlugin extends AbstractCollectorPlugin{
+
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		try {
+
+			OpenTrialIterator iterator = new OpenTrialIterator(interfaceDescriptor.getBaseUrl(),fromDate,untilDate);
+			return iterator;
+		} catch (Exception e) {
+			throw new CollectorServiceException("OOOPS something bad happen on creating iterator ", e);
+		}
+
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/grist/GristCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/grist/GristCollectorPlugin.java
@ -0,0 +1,32 @@
+package eu.dnetlib.data.collector.plugins.projects.grist;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ * Plugin to collect metadata record about projects and fundings via the europePMC GRIST API (e.g. WT projects).
+ * <p>
+ * Documentation on GRIST API: http://europepmc.org/GristAPI.
+ * </p>
+ * <p>
+ * BaseURL: http://www.ebi.ac.uk/europepmc/GristAPI/rest/get/query=ga:"Wellcome Trust"&resultType=core
+ * where resultType=core asks for the complete information (including abstracts).
+ * The results returned by the API are XMLs.
+ * </p>
+ * <p>
+ * Pagination: use parameter 'page'. When the response contains empty 'RecordList', it means we reached the end.
+ * </p>
+ *
+ * @author alessia
+ */
+public class GristCollectorPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		//baseURL: http://www.ebi.ac.uk/europepmc/GristAPI/rest/get/query=ga:%22Wellcome%20Trust%22&resultType=core
+		return new GristProjectsIterable(interfaceDescriptor.getBaseUrl());
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/grist/GristProjectsIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/grist/GristProjectsIterable.java
@ -0,0 +1,136 @@
+package eu.dnetlib.data.collector.plugins.projects.grist;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Queue;
+import java.util.concurrent.PriorityBlockingQueue;
+
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
+import eu.dnetlib.enabling.resultset.SizedIterable;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.Element;
+import org.dom4j.io.SAXReader;
+
+public class GristProjectsIterable implements SizedIterable<String> {
+
+	private static final Log log = LogFactory.getLog(GristProjectsIterable.class); // NOPMD by marko on 11/24/08 5:02 PM
+
+	private String queryURL;
+	private int total;
+	private SAXReader reader;
+
+	public GristProjectsIterable(String baseURL) throws CollectorServiceException {
+		queryURL = baseURL;
+		reader = new SAXReader();
+		total = getTotalCount();
+	}
+
+	@Override
+	public int getNumberOfElements() {
+		return total;
+	}
+
+	private int getTotalCount() throws CollectorServiceException {
+		try {
+			URL pageUrl = new URL(queryURL);
+			log.debug("Getting hit count from: " + pageUrl.toString());
+			String resultPage = IOUtils.toString(pageUrl);
+			Document doc = reader.read(IOUtils.toInputStream(resultPage));
+			String hitCount = doc.selectSingleNode("/Response/HitCount").getText();
+			return Integer.parseInt(hitCount);
+		} catch (NumberFormatException e) {
+			log.warn("Cannot set the total count from '/Response/HitCount'");
+		} catch (DocumentException e) {
+			throw new CollectorServiceException(e);
+		} catch (MalformedURLException e) {
+			throw new CollectorServiceException(e);
+		} catch (IOException e) {
+			throw new CollectorServiceException(e);
+		}
+		return -1;
+	}
+
+	@Override
+	public Iterator<String> iterator() {
+		return new Iterator<String>() {
+
+			private Queue<String> projects = new PriorityBlockingQueue<String>();
+			private boolean morePages = true;
+			private int pageNumber = 0;
+			private SAXReader reader = new SAXReader();
+			//The following is for debug only
+			private int nextCounter = 0;
+
+			@Override
+			public boolean hasNext() {
+				try {
+					fillProjectListIfNeeded();
+				} catch (CollectorServiceException e) {
+					throw new CollectorServiceRuntimeException(e);
+				}
+				return !projects.isEmpty();
+			}
+
+			@Override
+			public String next() {
+				nextCounter++;
+				log.debug(String.format("Calling next %s times. projects queue has %s elements", nextCounter, projects.size()));
+				try {
+					fillProjectListIfNeeded();
+					return projects.poll();
+				} catch (CollectorServiceException e) {
+					throw new CollectorServiceRuntimeException(e);
+				}
+			}
+
+			@Override
+			public void remove() {
+				throw new UnsupportedOperationException();
+			}
+
+			private boolean fillProjectListIfNeeded() throws CollectorServiceException {
+				if (morePages && projects.isEmpty()) {
+					String resultPage = getNextPage();
+					Document doc = null;
+					try {
+						doc = reader.read(IOUtils.toInputStream(resultPage));
+						List<Element> records = doc.selectNodes("//RecordList/Record");
+						if (records != null && !records.isEmpty()) {
+							for (Element p : records) {
+
+								projects.add(p.asXML());
+							}
+							return true;
+						} else {
+							log.info("No more projects to read at page nr. " + pageNumber);
+							morePages = false;
+							return false;
+						}
+					} catch (DocumentException e) {
+						throw new CollectorServiceException(e);
+					}
+				} else return false;
+			}
+
+			private String getNextPage() {
+				pageNumber++;
+				try {
+					URL pageUrl = new URL(queryURL + "&page=" + pageNumber);
+					log.debug("Getting page at: " + pageUrl.toString());
+					return IOUtils.toString(pageUrl);
+				} catch (Exception e) {
+					throw new CollectorServiceRuntimeException("Error on page " + pageNumber, e);
+				}
+			}
+		};
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2CollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2CollectorPlugin.java
@ -0,0 +1,32 @@
+package eu.dnetlib.data.collector.plugins.projects.gtr2;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ * Plugin to collect metadata record about projects and fundings via the RCUK grt2 API.
+ * <p>
+ * Documentation : http://gtr.rcuk.ac.uk/resources/api.html.
+ * </p>
+ * <p>
+ * BaseURL: http://gtr.rcuk.ac.uk/gtr/api
+ * The results returned by the API are XMLs.
+ * </p>
+ * <p>
+ * Pagination: TO BE DEFINED. Exceeding the number of pages available will result in a HTTP response code of 404
+ * </p>
+ *
+ * @author alessia
+ */
+public class Gtr2CollectorPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
+			throws CollectorServiceException {
+		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); }
+
+		return new Gtr2ProjectsIterable(interfaceDescriptor.getBaseUrl(), fromDate);
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2Helper.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2Helper.java
@ -0,0 +1,181 @@
+package eu.dnetlib.data.collector.plugins.projects.gtr2;
+
+import java.io.ByteArrayOutputStream;
+import java.io.StringWriter;
+import java.util.concurrent.*;
+
+import com.ximpleware.AutoPilot;
+import com.ximpleware.VTDGen;
+import com.ximpleware.VTDNav;
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.lang3.*;
+
+public class Gtr2Helper {
+
+	private static final Log log = LogFactory.getLog(Gtr2Helper.class); // NOPMD by marko on 11/24/08 5:02 PM
+
+	private VTDNav mainVTDNav;
+	private AutoPilot mainAutoPilot;
+	private StringWriter writer;
+	private HttpConnector connector;
+	//private BlockingQueue<String> fragment = new ArrayBlockingQueue<String>(20);
+
+	public String processProject(final VTDNav vn, final String namespaces) throws Exception {
+		//log.debug("Processing project at "+projectURL);
+		writer = new StringWriter();
+		mainVTDNav = vn;
+		mainAutoPilot = new AutoPilot(mainVTDNav);
+		writer.write("<doc " + namespaces + ">");
+		writeFragment(mainVTDNav);
+
+		mainAutoPilot.selectXPath("//link[@rel='FUND']");
+		ExecutorService es = Executors.newFixedThreadPool(5);
+
+		while (mainAutoPilot.evalXPath() != -1) {
+			Thread t = new Thread(new ProcessFunder(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href"))));
+			es.execute(t);
+		}
+
+		mainAutoPilot.resetXPath();
+		mainAutoPilot.selectXPath(".//link[@rel='LEAD_ORG']");
+		while (mainAutoPilot.evalXPath() != -1) {
+			Thread t = new Thread(new Org(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
+					new String[] { "<ld-org>", "</ld-org>" }));
+			es.execute(t);
+		}
+		mainAutoPilot.resetXPath();
+		mainAutoPilot.selectXPath(".//link[@rel='PP_ORG']");
+		while (mainAutoPilot.evalXPath() != -1) {
+			Thread t = new Thread(new Org(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
+					new String[] { "<pp-org>","</pp-org>" }));
+			es.execute(t);
+		}
+		mainAutoPilot.resetXPath();
+
+		mainAutoPilot.selectXPath(".//link[@rel='PI_PER']");
+		while (mainAutoPilot.evalXPath() != -1) {
+			Thread t = new Thread(new PiPer(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href"))));
+			es.execute(t);
+		}
+		es.shutdown();
+		log.debug("Waiting threads");
+		es.awaitTermination(10, TimeUnit.MINUTES);
+
+		log.debug("Finished writing project");
+		writer.write("</doc>");
+		writer.close();
+
+		return writer.toString();
+	}
+
+	private VTDNav setNavigator(final String httpUrl) {
+		VTDGen vg_tmp = new VTDGen();
+		connector = new HttpConnector();
+		try {
+			byte[] bytes = connector.getInputSource(httpUrl).getBytes("UTF-8");
+			vg_tmp.setDoc(bytes);
+			vg_tmp.parse(false);
+			//vg_tmp.parseHttpUrl(httpUrl, false);
+			return vg_tmp.getNav();
+		}catch (Throwable e){
+			return null;
+		}
+	}
+
+	private int evalXpath(final VTDNav fragmentVTDNav, final String xPath) throws Exception {
+
+		AutoPilot ap_tmp = new AutoPilot(fragmentVTDNav);
+		ap_tmp.selectXPath(xPath);
+		return ap_tmp.evalXPath();
+	}
+
+	private void writeFragment(final VTDNav nav) throws Exception {
+		ByteArrayOutputStream b = new ByteArrayOutputStream();
+		nav.dumpFragment(b);
+		String ret = b.toString();
+		b.reset();
+		writer.write(ret);
+	}
+
+	private void writeNewTagAndInfo(final VTDNav vn, final String xPath, final String xmlOpenTag, final String xmlCloseTag, final String attrName) throws Exception {
+
+		int nav_res = evalXpath(vn, xPath);
+		if (nav_res != -1) {
+			String tmp = xmlOpenTag;
+			if (attrName != null) tmp += (vn.toNormalizedString(vn.getAttrVal(attrName)));
+			else
+				tmp += (StringEscapeUtils.escapeXml11(vn.toNormalizedString(vn.getText())));
+			tmp += (xmlCloseTag);
+			writer.write(tmp);
+		}
+	}
+
+	private class PiPer implements Runnable {
+
+		private VTDNav vn;
+
+		public PiPer(String httpURL) {
+			vn = setNavigator(httpURL);
+		}
+
+		@Override
+		public void run() {
+			try {
+				writeFragment(vn);
+			} catch (Throwable e) {log.debug("Eccezione in  PiPer " + e.getMessage());}
+
+		}
+	}
+
+	private class Org implements Runnable {
+
+		private String[] tags;
+		private VTDNav vn;
+
+		public Org(final String httpURL, final String[] tags) {
+			vn = setNavigator(httpURL);
+			this.tags = tags;
+		}
+
+		@Override
+		public void run() {
+			try {
+				writeNewTagAndInfo(vn, "//name", tags[0]+"<name>", "</name>", null);
+				vn.toElement(VTDNav.ROOT);
+				writeNewTagAndInfo(vn, "//country", "<country>", "</country>", null);
+				vn.toElement(VTDNav.ROOT);
+				writeNewTagAndInfo(vn, ".", "<id>", "</id>"+tags[1], "id");
+			} catch (Throwable e) {
+				log.debug("Eccezione in  Org " + e.getMessage());
+			}
+		}
+
+	}
+
+	private class ProcessFunder implements Runnable {
+
+		private VTDNav vn;
+
+		public ProcessFunder(final String httpURL) {
+			vn = setNavigator(httpURL);
+		}
+
+		@Override
+		public void run() {
+
+			try {
+				AutoPilot ap = new AutoPilot(vn);
+				writeFragment(vn);
+				ap.selectXPath(".//link[@rel='FUNDER']");
+				VTDNav tmp_vn;
+				while (ap.evalXPath() != -1) {
+					tmp_vn = setNavigator(vn.toNormalizedString(vn.getAttrVal("href")));
+					writeNewTagAndInfo(tmp_vn, "//name", "<funder> <name>", "</name></funder>", null);
+				}
+			} catch (Throwable e) {log.debug("Eccezione in Funder" + e.getMessage());}
+		}
+
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2ProjectsIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2ProjectsIterable.java
@ -0,0 +1,352 @@
+package eu.dnetlib.data.collector.plugins.projects.gtr2;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import com.ximpleware.AutoPilot;
+import com.ximpleware.VTDGen;
+import com.ximpleware.VTDNav;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
+import eu.dnetlib.enabling.resultset.SizedIterable;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import eu.dnetlib.data.collector.plugins.HttpConnector;
+
+/**
+ * Created by alessia on 28/11/16.
+ */
+public class Gtr2ProjectsIterable implements SizedIterable<String> {
+
+	public static final String TERMINATOR = "ARNOLD";
+	public static final int WAIT_END_SECONDS = 120;
+	public static final int PAGE_SZIE = 20;
+
+	private static final Log log = LogFactory.getLog(Gtr2ProjectsIterable.class);
+
+	private String queryURL;
+	private int total = -1;
+	private int startFromPage = 1;
+	private int endAtPage;
+	private VTDGen vg;
+	private VTDNav vn;
+	private AutoPilot ap;
+	private String namespaces;
+	private boolean incremental = false;
+	private DateTime fromDate;
+	private DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd");
+	private ArrayBlockingQueue<String> projects = new ArrayBlockingQueue<String>(20);
+	//private boolean finished = false;
+	private final ExecutorService es = Executors.newFixedThreadPool(PAGE_SZIE);
+	private String nextElement = null;
+	private HttpConnector connector;
+
+	public Gtr2ProjectsIterable(final String baseUrl, final String fromDate) throws CollectorServiceException {
+		prepare(baseUrl, fromDate);
+		fillInfo(true);
+	}
+
+	public Gtr2ProjectsIterable(final String baseUrl, final String fromDate, final int startFromPage, final int endAtPage) throws CollectorServiceException {
+		prepare(baseUrl, fromDate);
+		this.setStartFromPage(startFromPage);
+		this.setEndAtPage(endAtPage);
+		fillInfo(false);
+	}
+
+	private void prepare(final String baseUrl, final String fromDate) {
+		connector = new HttpConnector();
+		queryURL = baseUrl + "/projects";
+		vg = new VTDGen();
+		this.incremental = StringUtils.isNotBlank(fromDate);
+		if (incremental) {
+			// I expect fromDate in the format 'yyyy-MM-dd'. See class eu.dnetlib.msro.workflows.nodes.collect.FindDateRangeForIncrementalHarvestingJobNode
+			this.fromDate = DateTime.parse(fromDate, simpleDateTimeFormatter);
+			log.debug("fromDate string: " + fromDate + " -- parsed: " + this.fromDate.toString());
+		}
+	}
+
+	@Override
+	public int getNumberOfElements() {
+		return total;
+	}
+
+	private void fillInfo(final boolean all) throws CollectorServiceException {
+		try {
+			// log.debug("Getting hit count from: " + queryURL);
+			byte[] bytes = connector.getInputSource(queryURL).getBytes("UTF-8");
+			vg.setDoc(bytes);
+			vg.parse(false);
+			//vg.parseHttpUrl(queryURL, false);
+			initParser();
+			String hitCount = vn.toNormalizedString(vn.getAttrVal("totalSize"));
+			String totalPages = vn.toNormalizedString(vn.getAttrVal("totalPages"));
+			namespaces = "xmlns:ns1=\"" + vn.toNormalizedString(vn.getAttrVal("ns1")) + "\" ";
+			namespaces += "xmlns:ns2=\"" + vn.toNormalizedString(vn.getAttrVal("ns2")) + "\" ";
+			namespaces += "xmlns:ns3=\"" + vn.toNormalizedString(vn.getAttrVal("ns3")) + "\" ";
+			namespaces += "xmlns:ns4=\"" + vn.toNormalizedString(vn.getAttrVal("ns4")) + "\" ";
+			namespaces += "xmlns:ns5=\"" + vn.toNormalizedString(vn.getAttrVal("ns5")) + "\" ";
+			namespaces += "xmlns:ns6=\"" + vn.toNormalizedString(vn.getAttrVal("ns6")) + "\" ";
+			if (all) {
+				setEndAtPage(Integer.parseInt(totalPages));
+				total = Integer.parseInt(hitCount);
+			}
+			Thread ft = new Thread(new FillProjectList());
+			ft.start();
+			log.debug("Expected number of pages: " + (endAtPage - startFromPage + 1));
+		} catch (NumberFormatException e) {
+			log.error("Cannot set the total count or the number of pages");
+			throw new CollectorServiceException(e);
+		} catch (Throwable e) {
+			throw new CollectorServiceException(e);
+		}
+	}
+
+	@Override
+	public Iterator<String> iterator() {
+
+		return new Iterator<String>() {
+			// The following is for debug only
+			private int nextCounter = 0;
+
+			@Override
+			public boolean hasNext() {
+				try {
+					log.debug("hasNext?");
+					if (nextElement == null) {
+						nextElement = projects.poll(WAIT_END_SECONDS, TimeUnit.SECONDS);
+						log.debug("Exit poll :-)");
+					}
+					return nextElement != null && !nextElement.equals(TERMINATOR);
+				} catch (InterruptedException e) {
+					throw new CollectorServiceRuntimeException(e);
+				}
+			}
+
+			@Override
+			public String next() {
+				nextCounter++;
+				log.debug(String.format("Calling next %s times.", nextCounter));
+
+				if (nextElement == null) throw new NoSuchElementException();
+				else {
+					String res = nextElement;
+					nextElement = null;
+					return res;
+				}
+			}
+
+			@Override
+			public void remove() {
+				throw new UnsupportedOperationException();
+			}
+
+		};
+	}
+
+	private void initParser() {
+		vn = vg.getNav();
+		ap = new AutoPilot(vn);
+	}
+
+	public String getQueryURL() {
+		return queryURL;
+	}
+
+	public void setQueryURL(final String queryURL) {
+		this.queryURL = queryURL;
+	}
+
+	public int getTotal() {
+		return total;
+	}
+
+	public void setTotal(final int total) {
+		this.total = total;
+	}
+
+	public int getEndAtPage() {
+		return endAtPage;
+	}
+
+	public void setEndAtPage(final int endAtPage) {
+		this.endAtPage = endAtPage;
+		log.debug("Overriding endAtPage to " + endAtPage);
+	}
+
+	public VTDGen getVg() {
+		return vg;
+	}
+
+	public void setVg(final VTDGen vg) {
+		this.vg = vg;
+	}
+
+	public VTDNav getVn() {
+		return vn;
+	}
+
+	public void setVn(final VTDNav vn) {
+		this.vn = vn;
+	}
+
+	public AutoPilot getAp() {
+		return ap;
+	}
+
+	public void setAp(final AutoPilot ap) {
+		this.ap = ap;
+	}
+
+	public String getNamespaces() {
+		return namespaces;
+	}
+
+	public void setNamespaces(final String namespaces) {
+		this.namespaces = namespaces;
+	}
+
+	public int getStartFromPage() {
+		return startFromPage;
+	}
+
+	public void setStartFromPage(final int startFromPage) {
+		this.startFromPage = startFromPage;
+		log.debug("Overriding startFromPage to " + startFromPage);
+	}
+
+	private class FillProjectList implements Runnable {
+
+		private boolean morePages = true;
+		private int pageNumber = startFromPage;
+
+		@Override
+		public void run() {
+			String resultPageUrl = "";
+			try {
+				do {
+					resultPageUrl = getNextPageUrl();
+					log.debug("Page: " + resultPageUrl);
+					// clear VGen before processing the next file
+					vg.clear();
+					byte[] bytes = connector.getInputSource(resultPageUrl).getBytes("UTF-8");
+					vg.setDoc(bytes);
+					vg.parse(false);
+					//vg.parseHttpUrl(resultPageUrl, false);
+					initParser();
+					ap.selectXPath("//project");
+					int res;
+
+					while ((res = ap.evalXPath()) != -1) {
+						final String projectHref = vn.toNormalizedString(vn.getAttrVal("href"));
+						Thread t = new Thread(new ParseProject(projectHref));
+						t.setName("Thread for " + res);
+						es.execute(t);
+					}
+					ap.resetXPath();
+
+				} while (morePages);
+				es.shutdown();
+				es.awaitTermination(WAIT_END_SECONDS, TimeUnit.SECONDS);
+				projects.put(TERMINATOR);
+
+			} catch (Throwable e) {
+				log.error("Exception processing " + resultPageUrl + "\n" + e.getMessage());
+			}
+		}
+
+		private String getNextPageUrl() {
+			String url = queryURL + "?p=" + pageNumber;
+			if (pageNumber == endAtPage) {
+				morePages = false;
+			}
+			pageNumber++;
+			return url;
+		}
+
+	}
+
+	private class ParseProject implements Runnable {
+
+		VTDNav vn1;
+		VTDGen vg1;
+		private String projectRef;
+
+		public ParseProject(final String projectHref) {
+			projectRef = projectHref;
+			vg1 = new VTDGen();
+			try {
+				byte[] bytes = connector.getInputSource(projectRef).getBytes("UTF-8");
+				vg1.setDoc(bytes);
+				vg1.parse(false);
+				//vg1.parseHttpUrl(projectRef, false);
+				vn1 = vg1.getNav();
+			}catch(Throwable e){
+				log.error("Exception processing " + projectRef + "\n" + e.getMessage());
+			}
+		}
+
+		private int projectsUpdate(String attr) throws CollectorServiceException {
+			try {
+				int index = vn1.getAttrVal(attr);
+				if (index != -1) {
+					String d = vn1.toNormalizedString(index);
+					DateTime recordDate = DateTime.parse(d.substring(0, d.indexOf("T")), simpleDateTimeFormatter);
+					// updated or created after the last time it was collected
+					if (recordDate.isAfter(fromDate)) {
+						log.debug("New project to collect");
+						return index;
+					}
+					return -1;
+				}
+				return index;
+			} catch (Throwable e) {
+				throw new CollectorServiceException(e);
+			}
+		}
+
+		private String collectProject() throws CollectorServiceException {
+			try {
+
+				int p = vn1.getAttrVal("href");
+
+				final String projectHref = vn1.toNormalizedString(p);
+				log.debug("collecting project at " + projectHref);
+
+				Gtr2Helper gtr2Helper = new Gtr2Helper();
+				String projectPackage = gtr2Helper.processProject(vn1, namespaces);
+
+				return projectPackage;
+			} catch (Throwable e) {
+				throw new CollectorServiceException(e);
+			}
+		}
+
+		private boolean add(String attr) throws CollectorServiceException {
+			return projectsUpdate(attr) != -1;
+		}
+
+		@Override
+		public void run() {
+			log.debug("Getting project info from " + projectRef);
+			try {
+				if (!incremental || (incremental && (add("created") || add("updated")))) {
+					projects.put(collectProject());
+					log.debug("Project enqueued " + projectRef);
+				}
+			} catch (Throwable e) {
+				log.error("Error on ParseProject " + e.getMessage());
+				throw new CollectorServiceRuntimeException(e);
+			}
+		}
+
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestCollectorPlugin.java
@ -0,0 +1,59 @@
+/**
+ * 
+ */
+package eu.dnetlib.data.collector.plugins.rest;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * @author js, Andreas Czerniak
+ *
+ */
+public class RestCollectorPlugin extends AbstractCollectorPlugin {
+
+	@Override
+	public Iterable<String> collect(InterfaceDescriptor ifDescriptor, String arg1, String arg2)
+			throws CollectorServiceException {
+		final String baseUrl = ifDescriptor.getBaseUrl();
+		final String resumptionType = ifDescriptor.getParams().get("resumptionType");
+		final String resumptionParam = ifDescriptor.getParams().get("resumptionParam");
+		final String resumptionXpath = ifDescriptor.getParams().get("resumptionXpath");
+		final String resultTotalXpath = ifDescriptor.getParams().get("resultTotalXpath");
+		final String resultFormatParam = ifDescriptor.getParams().get("resultFormatParam");
+		final String resultFormatValue = ifDescriptor.getParams().get("resultFormatValue");
+		final String resultSizeParam = ifDescriptor.getParams().get("resultSizeParam");
+		final String resultSizeValue = (StringUtils.isBlank(ifDescriptor.getParams().get("resultSizeValue"))) ? "100" : ifDescriptor.getParams().get("resultSizeValue");
+                final String queryParams = ifDescriptor.getParams().get("queryParams");
+		final String entityXpath = ifDescriptor.getParams().get("entityXpath");
+		
+		if (StringUtils.isBlank(baseUrl)) {throw new CollectorServiceException("Param 'baseUrl' is null or empty");}
+		if (StringUtils.isBlank(resumptionType)) {throw new CollectorServiceException("Param 'resumptionType' is null or empty");}
+		if (StringUtils.isBlank(resumptionParam)) {throw new CollectorServiceException("Param 'resumptionParam' is null or empty");}
+		// if (StringUtils.isBlank(resumptionXpath)) {throw new CollectorServiceException("Param 'resumptionXpath' is null or empty");}
+		// if (StringUtils.isBlank(resultTotalXpath)) {throw new CollectorServiceException("Param 'resultTotalXpath' is null or empty");}
+		// resultFormatParam can be emtpy because some Rest-APIs doesn't like this argument in the query
+                //if (StringUtils.isBlank(resultFormatParam)) {throw new CollectorServiceException("Param 'resultFormatParam' is null, empty or whitespace");}
+		if (StringUtils.isBlank(resultFormatValue)) {throw new CollectorServiceException("Param 'resultFormatValue' is null or empty");}
+		if (StringUtils.isBlank(resultSizeParam)) {throw new CollectorServiceException("Param 'resultSizeParam' is null or empty");}
+                // prevent resumptionType: discover -- if (Integer.valueOf(resultSizeValue) <= 1) {throw new CollectorServiceException("Param 'resultSizeValue' is less than 2");}
+                if (StringUtils.isBlank(queryParams)) {throw new CollectorServiceException("Param 'queryParams' is null or empty");}
+		if (StringUtils.isBlank(entityXpath)) {throw new CollectorServiceException("Param 'entityXpath' is null or empty");}
+		
+		return () -> new RestIterator(
+				baseUrl,
+				resumptionType,
+				resumptionParam,
+				resumptionXpath,
+				resultTotalXpath,
+				resultFormatParam,
+				resultFormatValue,
+				resultSizeParam,
+                                resultSizeValue,
+				queryParams,
+				entityXpath);
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/rest/RestIterator.java
@ -0,0 +1,343 @@
+/**
+ * log.debug(...) equal to  log.trace(...) in the application-logs
+ * <p>
+ * known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue
+ */
+package eu.dnetlib.data.collector.plugins.rest;
+
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.Queue;
+import java.util.concurrent.PriorityBlockingQueue;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.xpath.*;
+
+import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+
+/**
+ * @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak
+ * @date 2018-09-03
+ *
+ */
+public class RestIterator implements Iterator<String> {
+
+	// TODO: clean up the comments of replaced source code
+	private static final Log log = LogFactory.getLog(RestIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
+
+	private static final String wrapName = "recordWrap";
+	private String baseUrl;
+	private String resumptionType;
+	private String resumptionParam;
+	private String resultFormatValue;
+	private String queryParams;
+	private int resultSizeValue;
+	private int resumptionInt = 0;            // integer resumption token (first record to harvest)
+	private int resultTotal = -1;
+	private String resumptionStr = Integer.toString(resumptionInt);  // string resumption token (first record to harvest or token scanned from results)
+	private InputStream resultStream;
+	private Transformer transformer;
+	private XPath xpath;
+	private String query;
+	private XPathExpression xprResultTotalPath;
+	private XPathExpression xprResumptionPath;
+	private XPathExpression xprEntity;
+	private String queryFormat;
+	private String querySize;
+	private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
+	private int discoverResultSize = 0;
+	private int pagination = 1;
+
+	public RestIterator(
+			final String baseUrl,
+			final String resumptionType,
+			final String resumptionParam,
+			final String resumptionXpath,
+			final String resultTotalXpath,
+			final String resultFormatParam,
+			final String resultFormatValue,
+			final String resultSizeParam,
+			final String resultSizeValueStr,
+			final String queryParams,
+			final String entityXpath
+	) {
+		this.baseUrl = baseUrl;
+		this.resumptionType = resumptionType;
+		this.resumptionParam = resumptionParam;
+		this.resultFormatValue = resultFormatValue;
+		this.queryParams = queryParams;
+		this.resultSizeValue = Integer.valueOf(resultSizeValueStr);
+
+		queryFormat = StringUtils.isNotBlank(resultFormatParam) ? "&" + resultFormatParam + "=" + resultFormatValue : "";
+		querySize = StringUtils.isNotBlank(resultSizeParam) ? "&" + resultSizeParam + "=" + resultSizeValueStr : "";
+
+		try {
+			initXmlTransformation(resultTotalXpath, resumptionXpath, entityXpath);
+		} catch (Exception e) {
+			throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
+		}
+		initQueue();
+	}
+
+	private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
+			throws TransformerConfigurationException, XPathExpressionException {
+		transformer = TransformerFactory.newInstance().newTransformer();
+		transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+		transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
+		xpath = XPathFactory.newInstance().newXPath();
+		xprResultTotalPath = xpath.compile(resultTotalXpath);
+		xprResumptionPath = xpath.compile(StringUtils.isBlank(resumptionXpath) ? "/" : resumptionXpath);
+		xprEntity = xpath.compile(entityXpath);
+	}
+
+	private void initQueue() {
+		query = baseUrl + "?" + queryParams + querySize + queryFormat;
+	}
+
+	private void disconnect() {
+		// TODO close inputstream
+	}
+
+	/* (non-Javadoc)
+	 * @see java.util.Iterator#hasNext()
+	 */
+	@Override
+	public boolean hasNext() {
+		if (recordQueue.isEmpty() && query.isEmpty()) {
+			disconnect();
+			return false;
+		} else {
+			return true;
+		}
+	}
+
+	/* (non-Javadoc)
+	 * @see java.util.Iterator#next()
+	 */
+	@Override
+	public String next() {
+		synchronized (recordQueue) {
+			while (recordQueue.isEmpty() && !query.isEmpty()) {
+				try {
+					log.info("get Query: " + query);
+					query = downloadPage(query);
+					log.debug("next queryURL from downloadPage(): " + query);
+				} catch (CollectorServiceException e) {
+					log.debug("CollectorPlugin.next()-Exception: " + e);
+					throw new RuntimeException(e);
+				}
+			}
+			return recordQueue.poll();
+		}
+	}
+
+	/*
+	 * download page and return nextQuery
+	 */
+	private String downloadPage(String query) throws CollectorServiceException {
+		String resultJson;
+		String resultXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
+		String nextQuery = "";
+		String emptyXml = resultXml + "<" + wrapName + "></" + wrapName + ">";
+		Node resultNode = null;
+		NodeList nodeList = null;
+		String qUrlArgument = "";
+		int urlOldResumptionSize = 0;
+
+		try {
+			URL qUrl = new URL(query);
+
+			resultStream = qUrl.openStream();
+			if ("json".equals(resultFormatValue.toLowerCase())) {
+
+				resultJson = IOUtils.toString(resultStream, "UTF-8");
+				resultJson = syntaxConvertJsonKeyNamens(resultJson);
+				org.json.JSONObject jsonObject = new org.json.JSONObject(resultJson);
+				resultXml += org.json.XML.toString(jsonObject, wrapName); // wrap xml in single root element
+				log.trace("before inputStream: " + resultXml);
+				resultXml = XmlCleaner.cleanAllEntities(resultXml);
+				log.trace("after cleaning: " + resultXml);
+				resultStream = IOUtils.toInputStream(resultXml, "UTF-8");
+			}
+
+			if (!(emptyXml.toLowerCase()).equals(resultXml.toLowerCase())) {
+				resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
+				nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
+				log.debug("nodeList.length: " + nodeList.getLength());
+				for (int i = 0; i < nodeList.getLength(); i++) {
+					StringWriter sw = new StringWriter();
+					transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
+					recordQueue.add(sw.toString());
+				}
+			} else { log.info("resultXml is equal with emptyXml"); }
+
+			resumptionInt += resultSizeValue;
+
+			switch (resumptionType.toLowerCase()) {
+			case "scan":    // read of resumptionToken , evaluate next results, e.g. OAI, iterate over items
+				resumptionStr = xprResumptionPath.evaluate(resultNode);
+				break;
+
+			case "count":   // begin at one step for all records, iterate over items
+				resumptionStr = Integer.toString(resumptionInt);
+				break;
+
+			case "discover":   // size of result items unknown, iterate over items  (for openDOAR - 201808)
+				if (resultSizeValue < 2) {throw new CollectorServiceException("Mode: discover, Param 'resultSizeValue' is less than 2");}
+				qUrlArgument = qUrl.getQuery();
+				String[] arrayQUrlArgument = qUrlArgument.split("&");
+				for (String arrayUrlArgStr : arrayQUrlArgument) {
+					if (arrayUrlArgStr.startsWith(resumptionParam)) {
+						String[] resumptionKeyValue = arrayUrlArgStr.split("=");
+						urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
+						log.debug("discover OldResumptionSize from Url: " + urlOldResumptionSize);
+					}
+				}
+
+				if (((emptyXml.toLowerCase()).equals(resultXml.toLowerCase()))
+						|| ((nodeList != null) && (nodeList.getLength() < resultSizeValue))
+				) {
+					// resumptionStr = "";
+					if (nodeList != null) { discoverResultSize += nodeList.getLength(); }
+					resultTotal = discoverResultSize;
+				} else {
+					resumptionStr = Integer.toString(resumptionInt);
+					resultTotal = resumptionInt + 1;
+					if (nodeList != null) { discoverResultSize += nodeList.getLength(); }
+				}
+				log.info("discoverResultSize:  " + discoverResultSize);
+				break;
+
+			case "pagination":
+			case "page":         // pagination, iterate over pages
+				pagination += 1;
+				if (nodeList != null) {
+					discoverResultSize += nodeList.getLength();
+				} else {
+					resultTotal = discoverResultSize;
+					pagination = discoverResultSize;
+				}
+				resumptionInt = pagination;
+				resumptionStr = Integer.toString(resumptionInt);
+				break;
+
+			default:        // otherwise: abort
+				// resultTotal = resumptionInt;
+				break;
+			}
+
+			if (resultTotal == -1) {
+				resultTotal = Integer.parseInt(xprResultTotalPath.evaluate(resultNode));
+				if (resumptionType.toLowerCase().equals("page")) { resultTotal += 1; }           // to correct the upper bound
+				log.info("resultTotal was -1 is now: " + resultTotal);
+			}
+			log.info("resultTotal: " + resultTotal);
+			log.info("resInt: " + resumptionInt);
+			if (resumptionInt < resultTotal) {
+				nextQuery = baseUrl + "?" + queryParams + querySize + "&" + resumptionParam + "=" + resumptionStr + queryFormat;
+			} else
+				nextQuery = "";
+
+			log.debug("nextQueryUrl: " + nextQuery);
+			return nextQuery;
+
+		} catch (Exception e) {
+			log.error(e);
+			throw new IllegalStateException("collection failed: " + e.getMessage());
+		}
+	}
+
+	/**
+	 * convert in JSON-KeyName 'whitespace(s)' to '_' and '/' to '_', '(' and ')' to ''
+	 * check W3C XML syntax: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-starttags for valid tag names
+	 * and work-around for the JSON to XML converting of org.json.XML-package.
+	 *
+	 * known bugs:     doesn't prevent     "key name":" ["sexy name",": penari","erotic dance"],
+	 *
+	 * @param jsonInput
+	 * @return convertedJsonKeynameOutput
+	 */
+	private String syntaxConvertJsonKeyNamens(String jsonInput) {
+
+		log.trace("before convertJsonKeyNames: " + jsonInput);
+		// pre-clean json - rid spaces of element names (misinterpreted as elements with attributes in xml)
+		// replace ' 's in JSON Namens with '_'
+		while (jsonInput.matches(".*\"([^\"]*)\\s+([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)\\s+([^\"]*)\":", "\"$1_$2\":");
+		}
+
+		// replace forward-slash (sign '/' ) in JSON Names with '_'
+		while (jsonInput.matches(".*\"([^\"]*)/([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)/([^\"]*)\":", "\"$1_$2\":");
+		}
+
+		// replace '(' in JSON Names with ''
+		while (jsonInput.matches(".*\"([^\"]*)[(]([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)[(]([^\"]*)\":", "\"$1$2\":");
+		}
+
+		// replace ')' in JSON Names with ''
+		while (jsonInput.matches(".*\"([^\"]*)[)]([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)[)]([^\"]*)\":", "\"$1$2\":");
+		}
+
+		// replace startNumbers in JSON Keynames with 'n_'
+		while (jsonInput.matches(".*\"([^\"][0-9])([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"][0-9])([^\"]*)\":", "\"n_$1$2\":");
+		}
+
+		// replace ':' between number like '2018-08-28T11:05:00Z' in JSON keynames with ''
+		while (jsonInput.matches(".*\"([^\"]*[0-9]):([0-9][^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*[0-9]):([0-9][^\"]*)\":", "\"$1$2\":");
+		}
+
+		// replace ',' in JSON Keynames with '.' to prevent , in xml tagnames.
+		//            while (jsonInput.matches(".*\"([^\"]*),([^\"]*)\":.*")) {
+		//                jsonInput = jsonInput.replaceAll("\"([^\"]*),([^\"]*)\":", "\"$1.$2\":");
+		//            }
+
+		// replace '=' in JSON Keynames with '-'
+		while (jsonInput.matches(".*\"([^\"]*)=([^\"]*)\":.*")) {
+			jsonInput = jsonInput.replaceAll("\"([^\"]*)=([^\"]*)\":", "\"$1-$2\":");
+		}
+
+		log.trace("after syntaxConvertJsonKeyNames: " + jsonInput);
+		return jsonInput;
+	}
+
+	/**
+	 *
+	 * https://www.w3.org/TR/REC-xml/#charencoding shows character enoding in entities
+	 *          *
+	 * @param bufferStr - XML string
+	 * @return
+	 */
+	private static String cleanUnwantedJsonCharsInXmlTagnames(String bufferStr) {
+
+		while (bufferStr.matches(".*<([^<>].*),(.)>.*")) {
+			bufferStr = bufferStr.replaceAll("<([^<>.*),(.*)>", "<$1$2>");
+		}
+
+		// replace [#x10-#x1f] with ''
+		//            while (bufferStr.matches(".*&#x1[0-9a-f].*")) {
+		//                bufferStr = bufferStr.replaceAll("&#x1([0-9a-fA-F])", "");
+		//            }
+
+		return bufferStr;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/DatasetDocument.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/DatasetDocument.java
@ -0,0 +1,685 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import org.w3c.dom.Attr;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import java.io.StringWriter;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.List;
+
+public class DatasetDocument {
+	private List<Identifier> identifiers;
+	private List<Creator> creators;
+	private List<String> titles;
+	private List<String> alternativeTitles;
+	private List<String> publishers;
+	private List<LocalDate> publicationDates;
+	private List<String> subjects;
+	private List<Contributor> contributors;
+	private List<LocalDate> createdDates;
+	private List<LocalDate> updatedDates;
+	private List<String> languages;
+	private List<ResourceType> resourceTypes;
+	private List<AlternateIdentifier> alternateIdentifier;
+	private List<Citation> citations;
+	private List<String> sizes;
+	private List<String> format;
+	private List<String> version;
+	private List<License> licenses;
+	private List<String> descriptions;
+	private List<String> disambiguatingDescriptions;
+	private List<SpatialCoverage> geoLocations;
+
+	public List<Identifier> getIdentifiers() {
+		return identifiers;
+	}
+
+	public void setIdentifiers(List<Identifier> identifiers) {
+		this.identifiers = identifiers;
+	}
+
+	public List<Creator> getCreators() {
+		return creators;
+	}
+
+	public void setCreators(List<Creator> creators) {
+		this.creators = creators;
+	}
+
+	public List<String> getTitles() {
+		return titles;
+	}
+
+	public void setTitles(List<String> titles) {
+		this.titles = titles;
+	}
+
+	public List<String> getAlternativeTitles() {
+		return alternativeTitles;
+	}
+
+	public void setAlternativeTitles(List<String> alternativeTitles) {
+		this.alternativeTitles = alternativeTitles;
+	}
+
+	public List<String> getPublishers() {
+		return publishers;
+	}
+
+	public void setPublishers(List<String> publishers) {
+		this.publishers = publishers;
+	}
+
+	public List<LocalDate> getPublicationDates() {
+		return publicationDates;
+	}
+
+	public void setPublicationDates(List<LocalDate> publicationDates) {
+		this.publicationDates = publicationDates;
+	}
+
+	public List<String> getSubjects() {
+		return subjects;
+	}
+
+	public void setSubjects(List<String> subjects) {
+		this.subjects = subjects;
+	}
+
+	public List<Contributor> getContributors() {
+		return contributors;
+	}
+
+	public void setContributors(List<Contributor> contributors) {
+		this.contributors = contributors;
+	}
+
+	public List<LocalDate> getCreatedDates() {
+		return createdDates;
+	}
+
+	public void setCreatedDates(List<LocalDate> createdDates) {
+		this.createdDates = createdDates;
+	}
+
+	public List<LocalDate> getUpdatedDates() {
+		return updatedDates;
+	}
+
+	public void setUpdatedDates(List<LocalDate> updatedDates) {
+		this.updatedDates = updatedDates;
+	}
+
+	public List<String> getLanguages() {
+		return languages;
+	}
+
+	public void setLanguages(List<String> languages) {
+		this.languages = languages;
+	}
+
+	public List<ResourceType> getResourceTypes() {
+		return resourceTypes;
+	}
+
+	public void setResourceTypes(List<ResourceType> resourceTypes) {
+		this.resourceTypes = resourceTypes;
+	}
+
+	public List<AlternateIdentifier> getAlternateIdentifier() {
+		return alternateIdentifier;
+	}
+
+	public void setAlternateIdentifier(List<AlternateIdentifier> alternateIdentifier) {
+		this.alternateIdentifier = alternateIdentifier;
+	}
+
+	public List<Citation> getCitations() {
+		return citations;
+	}
+
+	public void setCitations(List<Citation> citations) {
+		this.citations = citations;
+	}
+
+	public List<String> getSizes() {
+		return sizes;
+	}
+
+	public void setSizes(List<String> sizes) {
+		this.sizes = sizes;
+	}
+
+	public List<String> getFormat() {
+		return format;
+	}
+
+	public void setFormat(List<String> format) {
+		this.format = format;
+	}
+
+	public List<String> getVersion() {
+		return version;
+	}
+
+	public void setVersion(List<String> version) {
+		this.version = version;
+	}
+
+	public List<License> getLicenses() {
+		return licenses;
+	}
+
+	public void setLicenses(List<License> licenses) {
+		this.licenses = licenses;
+	}
+
+	public List<String> getDescriptions() {
+		return descriptions;
+	}
+
+	public void setDescriptions(List<String> descriptions) {
+		this.descriptions = descriptions;
+	}
+
+	public List<String> getDisambiguatingDescriptions() {
+		return disambiguatingDescriptions;
+	}
+
+	public void setDisambiguatingDescriptions(List<String> disambiguatingDescriptions) {
+		this.disambiguatingDescriptions = disambiguatingDescriptions;
+	}
+
+	public List<SpatialCoverage> getGeoLocations() {
+		return geoLocations;
+	}
+
+	public void setGeoLocations(List<SpatialCoverage> geoLocations) {
+		this.geoLocations = geoLocations;
+	}
+
+	private  static String emptyXml;
+	private  static Object lockEmptyXml = new Object();
+	public static String emptyXml() {
+		if(DatasetDocument.emptyXml!=null) return DatasetDocument.emptyXml;
+
+		String xml = null;
+		try {
+			DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
+			DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
+			Document doc = docBuilder.newDocument();
+
+			Element root = doc.createElement("dataset");
+			doc.appendChild(root);
+
+			TransformerFactory tf = TransformerFactory.newInstance();
+			Transformer transformer = tf.newTransformer();
+			StringWriter writer = new StringWriter();
+			transformer.transform(new DOMSource(doc), new StreamResult(writer));
+			xml = writer.getBuffer().toString();
+		}catch(Exception ex){
+			xml = "<dataset/>";
+		}
+
+		synchronized (DatasetDocument.lockEmptyXml) {
+			if (DatasetDocument.emptyXml == null) DatasetDocument.emptyXml = xml;
+		}
+
+		return DatasetDocument.emptyXml;
+	}
+
+	public String toXml() throws Exception {
+		DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
+		DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
+		Document doc = docBuilder.newDocument();
+
+		Element root = doc.createElement("dataset");
+		doc.appendChild(root);
+
+		if(this.identifiers!=null){
+			for(Identifier item : this.identifiers){
+				item.toXml(root);
+			}
+		}
+		if(this.creators!=null){
+			Element creators = doc.createElement("creators");
+			root.appendChild(creators);
+			for(Creator item : this.creators){
+				item.toXml(creators);
+			}
+		}
+		if(this.titles!=null || this.alternativeTitles!=null){
+			Element titles = doc.createElement("titles");
+			root.appendChild(titles);
+			if(this.titles!=null) {
+				for (String item : this.titles) {
+					Element title = doc.createElement("title");
+					titles.appendChild(title);
+					title.appendChild(doc.createTextNode(item));
+				}
+			}
+			if(this.alternativeTitles!=null) {
+				for (String item : this.alternativeTitles) {
+					Element title = doc.createElement("title");
+					titles.appendChild(title);
+					title.setAttribute("titleType", "AlternativeTitle");
+					title.appendChild(doc.createTextNode(item));
+				}
+			}
+		}
+		if(this.publishers!=null){
+			for(String item : this.publishers){
+				Element publisher = doc.createElement("publisher");
+				root.appendChild(publisher);
+				publisher.appendChild(doc.createTextNode(item));
+			}
+		}
+		if(this.publicationDates!=null){
+			for(LocalDate item : this.publicationDates){
+				Element publicationYear = doc.createElement("publicationYear");
+				root.appendChild(publicationYear);
+				publicationYear.appendChild(doc.createTextNode(Integer.toString(item.getYear())));
+			}
+		}
+		if(this.subjects!=null){
+			Element subjects = doc.createElement("subjects");
+			root.appendChild(subjects);
+			for(String item : this.subjects){
+				Element subject = doc.createElement("subject");
+				subjects.appendChild(subject);
+				subject.appendChild(doc.createTextNode(item));
+			}
+		}
+		if(this.contributors!=null){
+			for(Contributor item : this.contributors){
+				item.toXml(root);
+			}
+		}
+		if(this.createdDates!=null || this.updatedDates!=null){
+			Element dates = doc.createElement("dates");
+			root.appendChild(dates);
+
+			DateTimeFormatter formatter = DateTimeFormatter.ofPattern("YYYY-MM-DD");
+
+			if(createdDates!=null) {
+				for (LocalDate item : this.createdDates) {
+					Element date = doc.createElement("date");
+					root.appendChild(date);
+					date.setAttribute("dateType", "Created");
+					date.appendChild(doc.createTextNode(item.format(formatter)));
+				}
+			}
+			if(updatedDates!=null) {
+				for (LocalDate item : this.updatedDates) {
+					Element date = doc.createElement("date");
+					root.appendChild(date);
+					date.setAttribute("dateType", "Updated");
+					date.appendChild(doc.createTextNode(item.format(formatter)));
+				}
+			}
+		}
+		if(this.languages!=null){
+			for(String item : this.languages){
+				Element language = doc.createElement("language");
+				root.appendChild(language);
+				language.appendChild(doc.createTextNode(item));
+			}
+		}
+		if(this.resourceTypes!=null){
+			for(ResourceType item : this.resourceTypes){
+				item.toXml(root);
+			}
+		}
+		if(this.alternateIdentifier!=null){
+			Element alternateIdentifiers = doc.createElement("alternateIdentifiers");
+			root.appendChild(alternateIdentifiers);
+			for(AlternateIdentifier item : this.alternateIdentifier){
+				item.toXml(alternateIdentifiers);
+			}
+		}
+		if(this.citations!=null){
+			for(Citation item : this.citations){
+				item.toXml(root);
+			}
+		}
+		if(this.sizes!=null){
+			Element sizes = doc.createElement("sizes");
+			root.appendChild(sizes);
+			for(String item : this.sizes){
+				Element size = doc.createElement("size");
+				sizes.appendChild(size);
+				size.appendChild(doc.createTextNode(item));
+			}
+		}
+		if(this.format!=null){
+			Element formats = doc.createElement("formats");
+			root.appendChild(formats);
+			for(String item : this.format){
+				Element format = doc.createElement("format");
+				formats.appendChild(format);
+				format.appendChild(doc.createTextNode(item));
+			}
+		}
+		if(this.version!=null){
+			for(String item : this.version){
+				Element version = doc.createElement("version");
+				root.appendChild(version);
+				version.appendChild(doc.createTextNode(item));
+			}
+		}
+		if(this.licenses!=null){
+			Element rightsList = doc.createElement("rightsList");
+			root.appendChild(rightsList);
+			for(License item : this.licenses){
+				item.toXml(rightsList);
+			}
+		}
+		if(this.descriptions!=null || this.disambiguatingDescriptions!=null){
+			Element descriptions = doc.createElement("descriptions");
+			root.appendChild(descriptions);
+			if(this.descriptions!=null) {
+				for (String item : this.descriptions) {
+					Element description = doc.createElement("description");
+					descriptions.appendChild(description);
+					description.setAttribute("descriptionType", "Abstract");
+					description.appendChild(doc.createTextNode(item));
+				}
+			}
+			if(this.disambiguatingDescriptions!=null) {
+				for (String item : this.disambiguatingDescriptions) {
+					Element description = doc.createElement("description");
+					descriptions.appendChild(description);
+					description.setAttribute("descriptionType", "Other");
+					description.appendChild(doc.createTextNode(item));
+				}
+			}
+		}
+		if(this.geoLocations!=null){
+			Element geoLocations = doc.createElement("geoLocations");
+			root.appendChild(geoLocations);
+			for(SpatialCoverage item : this.geoLocations){
+				item.toXml(geoLocations);
+			}
+		}
+
+		TransformerFactory tf = TransformerFactory.newInstance();
+		Transformer transformer = tf.newTransformer();
+		StringWriter writer = new StringWriter();
+		transformer.transform(new DOMSource(doc), new StreamResult(writer));
+		String xml = writer.getBuffer().toString();
+		return xml;
+	}
+
+	public static class SpatialCoverage{
+		public static class Point{
+			public String latitude;
+			public String longitude;
+
+			public Point() {}
+
+			public Point(String latitude, String longitude){
+				this.latitude = latitude;
+				this.longitude = longitude;
+			}
+		}
+		public String name;
+		public List<Point> points;
+		public List<String> boxes;
+
+		public SpatialCoverage() {}
+
+		public SpatialCoverage(String name, List<Point> points, List<String> boxes ) {
+			this.name = name;
+			this.points = points;
+			this.boxes = boxes;
+		}
+
+		public void toXml(Element parent){
+			Element node = parent.getOwnerDocument().createElement("geoLocation");
+			parent.appendChild(node);
+
+			if(this.points!=null) {
+				for(Point point : this.points) {
+					if(point.latitude == null || point.longitude == null) continue;
+					Element geoLocationPoint = parent.getOwnerDocument().createElement("geoLocationPoint");
+					geoLocationPoint.appendChild(parent.getOwnerDocument().createTextNode(String.format("%s %s", point.latitude, point.longitude)));
+					node.appendChild(geoLocationPoint);
+				}
+			}
+			if(this.boxes!=null) {
+				for(String box : this.boxes) {
+					if(box == null) continue;
+					Element geoLocationBox = parent.getOwnerDocument().createElement("geoLocationBox");
+					geoLocationBox.appendChild(parent.getOwnerDocument().createTextNode(box));
+					node.appendChild(geoLocationBox);
+				}
+			}
+			if(this.name!=null) {
+				Element geoLocationPlace = parent.getOwnerDocument().createElement("geoLocationPlace");
+				geoLocationPlace.appendChild(parent.getOwnerDocument().createTextNode(this.name));
+				node.appendChild(geoLocationPlace);
+			}
+		}
+	}
+
+	public static class License{
+		public String name;
+		public String url;
+
+		public License() {}
+
+		public License(String name, String url) {
+			this.name = name;
+			this.url = url;
+		}
+
+		public void toXml(Element parent){
+			Element node = parent.getOwnerDocument().createElement("rights");
+			parent.appendChild(node);
+
+			if(this.url!=null) {
+				node.setAttribute("rightsURI", this.url);
+			}
+			if(this.name!=null) {
+				node.appendChild(parent.getOwnerDocument().createTextNode(this.name));
+			}
+		}
+	}
+
+	public static class Citation{
+		public enum CitationIdentifierType{
+			ARK, arXiv, bibcode, DOI, EAN13, EISSN, Handle, ISBN, ISSN, ISTC, LISSN, LSID, PMID,
+			PURL, UPC, URL, URN
+		}
+
+		public CitationIdentifierType type;
+		public String value;
+
+		public Citation() {}
+
+		public Citation(String value, CitationIdentifierType type) {
+			this.value = value;
+			this.type = type;
+		}
+
+		public void toXml(Element parent){
+			Element node = parent.getOwnerDocument().createElement("relatedIdentifier");
+			parent.appendChild(node);
+
+			node.setAttribute("relatedIdentifierType", this.type.toString());
+			node.setAttribute("relationType", "Cites");
+			node.appendChild(parent.getOwnerDocument().createTextNode(this.value));
+		}
+	}
+
+	public static class Contributor{
+		public enum ContributorType{
+			ContactPerson, DataCollector, DataCurator, DataManager, Distributor, Editor, Funder, HostingInstitution,
+			Producer, ProjectLeader, ProjectManager, ProjectMember, RegistrationAgency, RegistrationAuthority,
+			RelatedPerson, Researcher, ResearchGroup, RightsHolder, Sponsor, Supervisor, WorkPackageLeader, Other
+		}
+
+		public String name;
+		public List<String> affiliations;
+		public ContributorType type;
+
+		public Contributor() {
+		}
+
+		public Contributor(String name) {
+			this.name = name;
+		}
+
+		public Contributor(String name, List<String> affiliations) {
+			this.name = name;
+			this.affiliations = affiliations;
+		}
+
+		public Contributor(String name, List<String> affiliations, ContributorType type) {
+			this.name = name;
+			this.affiliations = affiliations;
+			this.type = type;
+		}
+
+		public void toXml(Element parent){
+			Element node = parent.getOwnerDocument().createElement("contributor");
+			parent.appendChild(node);
+
+			node.setAttribute("contributorType", this.type.toString());
+
+			if(this.name!=null) {
+				Element contributorName = parent.getOwnerDocument().createElement("contributorName");
+				node.appendChild(contributorName);
+				contributorName.appendChild(parent.getOwnerDocument().createTextNode(this.name));
+			}
+			if(this.affiliations!=null) {
+				for(String item : this.affiliations) {
+					Element affiliation = parent.getOwnerDocument().createElement("affiliation");
+					node.appendChild(affiliation);
+					affiliation.appendChild(parent.getOwnerDocument().createTextNode(item));
+				}
+			}
+		}
+	}
+
+	public static class AlternateIdentifier{
+		public String identifier;
+		public String type;
+
+		public AlternateIdentifier() {}
+
+		public AlternateIdentifier(String identifier, String type) {
+			this.identifier = identifier;
+			this.type = type;
+		}
+
+		public void toXml(Element parent){
+			Element node = parent.getOwnerDocument().createElement("alternateIdentifier");
+			parent.appendChild(node);
+
+			if(this.type!=null) {
+				node.setAttribute("alternateIdentifierType", this.type);
+			}
+			if(this.identifier!=null) {
+				node.appendChild(parent.getOwnerDocument().createTextNode(this.identifier));
+			}
+		}
+	}
+
+	public static class ResourceType{
+		public enum ResourceTypeGeneralType {
+			Audiovisual, Collection, Dataset, Event, Image, InteractiveResource, Model, PhysicalObject, Service,
+			Software, Sound, Text, Workflow, Other
+		}
+
+		public ResourceTypeGeneralType type;
+
+		public ResourceType() {}
+
+		public ResourceType(ResourceTypeGeneralType type) {
+			this.type = type;
+		}
+
+		public void toXml(Element parent){
+			Element node = parent.getOwnerDocument().createElement("resourceType");
+			parent.appendChild(node);
+
+			if(this.type!=null) {
+				node.setAttribute("resourceTypeGeneral", this.type.toString());
+			}
+		}
+	}
+
+	public static class Creator {
+		public String name;
+		public List<String> affiliations;
+
+		public Creator() {
+		}
+
+		public Creator(String name) {
+			this.name = name;
+		}
+
+		public Creator(String name, List<String> affiliations) {
+			this.name = name;
+			this.affiliations = affiliations;
+		}
+
+		public void toXml(Element parent){
+			Element node = parent.getOwnerDocument().createElement("creator");
+			parent.appendChild(node);
+
+			if(this.name!=null) {
+				Element creatorName = parent.getOwnerDocument().createElement("creatorName");
+				node.appendChild(creatorName);
+				creatorName.appendChild(parent.getOwnerDocument().createTextNode(this.name));
+			}
+			if(this.affiliations!=null) {
+				for(String item : this.affiliations) {
+					Element affiliation = parent.getOwnerDocument().createElement("affiliation");
+					node.appendChild(affiliation);
+					affiliation.appendChild(parent.getOwnerDocument().createTextNode(item));
+				}
+			}
+		}
+	}
+
+	public static class Identifier {
+		public enum IdentifierType {
+			ARK, DOI, Handle, PURL, URN, URL
+		}
+
+		public String value;
+		public IdentifierType type;
+
+		public Identifier() {
+		}
+
+		public Identifier(IdentifierType type, String value) {
+			this.type = type;
+			this.value = value;
+		}
+
+		public void toXml(Element parent){
+			Element node = parent.getOwnerDocument().createElement("identifier");
+			parent.appendChild(node);
+
+			node.setAttribute("identifierType", this.type.toString());
+			if(this.value!=null) {
+				node.appendChild(parent.getOwnerDocument().createTextNode(this.value));
+			}
+		}
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/DatasetMappingIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/DatasetMappingIterator.java
@ -0,0 +1,514 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.json.JSONObject;
+
+import java.net.URL;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+
+public class DatasetMappingIterator implements Iterator<String> {
+	private static final Log log = LogFactory.getLog(EndpointAccessIterator.class);
+
+	public static class Options {
+		public static class IdentifierOptions{
+			public List<String> mappingARK;
+			public List<String> mappingDOI;
+			public List<String> mappingHandle;
+			public List<String> mappingPURL;
+			public List<String> mappingURN;
+			public List<String> mappingURL;
+			public DatasetDocument.Identifier.IdentifierType fallbackType;
+			public Boolean fallbackURL;
+		}
+
+		public static class ContributorOptions{
+			public DatasetDocument.Contributor.ContributorType fallbackType;
+		}
+
+		public static class PublicationDateOptions{
+			public String format;
+		}
+
+		public static class CreatedDateOptions{
+			public String format;
+		}
+
+		public static class UpdatedDateOptions{
+			public String format;
+		}
+
+		private IdentifierOptions identifierOptions;
+		private PublicationDateOptions publicationDateOptions;
+		private ContributorOptions contributorOptions;
+		private CreatedDateOptions createdDateOptions;
+		private UpdatedDateOptions updatedDateOptions;
+
+		public UpdatedDateOptions getUpdatedDateOptions() {
+			return updatedDateOptions;
+		}
+
+		public void setUpdatedDateOptions(UpdatedDateOptions updatedDateOptions) {
+			this.updatedDateOptions = updatedDateOptions;
+		}
+
+		public CreatedDateOptions getCreatedDateOptions() {
+			return createdDateOptions;
+		}
+
+		public void setCreatedDateOptions(CreatedDateOptions createdDateOptions) {
+			this.createdDateOptions = createdDateOptions;
+		}
+
+		public ContributorOptions getContributorOptions() {
+			return contributorOptions;
+		}
+
+		public void setContributorOptions(ContributorOptions contributorOptions) {
+			this.contributorOptions = contributorOptions;
+		}
+
+		public PublicationDateOptions getPublicationDateOptions() {
+			return publicationDateOptions;
+		}
+
+		public void setPublicationDateOptions(PublicationDateOptions publicationDateOptions) {
+			this.publicationDateOptions = publicationDateOptions;
+		}
+
+		public IdentifierOptions getIdentifierOptions() {
+			return identifierOptions;
+		}
+
+		public void setIdentifierOptions(IdentifierOptions identifierOptions) {
+			this.identifierOptions = identifierOptions;
+		}
+	}
+
+	private Options options;
+	private EndpointAccessIterator endpointAccessIterator;
+
+	public DatasetMappingIterator(Options options, EndpointAccessIterator endpointAccessIterator) {
+		this.options = options;
+		this.endpointAccessIterator = endpointAccessIterator;
+	}
+
+	@Override
+	public boolean hasNext() {
+		return this.endpointAccessIterator.hasNext();
+	}
+
+	@Override
+	public String next() {
+		JSONObject document = this.endpointAccessIterator.next();
+		String xml = null;
+		if (document == null) {
+			log.debug("no document provided to process. returning empty");
+			xml = DatasetDocument.emptyXml();
+		}
+		else {
+			log.debug("building document");
+			xml = this.buildDataset(document);
+			if (!Utils.validateXml(xml)) {
+				log.debug("xml not valid. setting to empty");
+				xml = null;
+			}
+			if (xml == null) {
+				log.debug("could not build xml. returning empty");
+				xml = DatasetDocument.emptyXml();
+			}
+		}
+
+		//if all else fails
+		if(xml == null){
+			log.debug("could not build xml. returning empty");
+			xml = "<dataset/>";
+		}
+
+		log.debug("xml document for dataset is: "+xml);
+
+		return xml;
+	}
+
+	private String buildDataset(JSONObject document){
+		String xml = null;
+		try{
+			DatasetDocument dataset = new DatasetDocument();
+
+			dataset.setIdentifiers(this.extractIdentifier(document));
+			dataset.setCreators(this.extractCreator(document));
+			dataset.setTitles(this.extractTitles(document));
+			dataset.setAlternativeTitles(this.extractAlternateTitles(document));
+			dataset.setPublishers(this.extractPublisher(document));
+			dataset.setPublicationDates(this.extractPublicationDate(document));
+			dataset.setSubjects(this.extractSubjects(document));
+			dataset.setContributors(this.extractContributors(document));
+			dataset.setCreatedDates(this.extractCreatedDate(document));
+			dataset.setUpdatedDates(this.extractUpdatedDate(document));
+			dataset.setLanguages(this.extractLanguages(document));
+			dataset.setResourceTypes(this.extractResourceTypes(document));
+			dataset.setAlternateIdentifier(this.extractAlternateIdentifiers(document));
+			dataset.setCitations(this.extractCitations(document));
+			dataset.setSizes(this.extractSize(document));
+			dataset.setFormat(this.extractEncodingFormat(document));
+			dataset.setVersion(this.extractVersion(document));
+			dataset.setLicenses(this.extractLicense(document));
+			dataset.setDescriptions(this.extractDescription(document));
+			dataset.setDisambiguatingDescriptions(this.extractDisambiguatingDescription(document));
+			dataset.setGeoLocations(this.extractSpatialCoverage(document));
+
+			log.debug("document contains native identifier: : "+(dataset.getIdentifiers()!=null && dataset.getIdentifiers().size() > 0));
+
+			if((dataset.getIdentifiers() == null || dataset.getIdentifiers().size() == 0) &&
+					this.options.getIdentifierOptions().fallbackURL){
+				log.debug("falling back to url identifier");
+				dataset.setIdentifiers(this.extractIdentifierFallbackURL(document));
+				log.debug("document contains overridden identifier: : "+(dataset.getIdentifiers()!=null && dataset.getIdentifiers().size() > 0));
+			}
+
+			xml = dataset.toXml();
+		}
+		catch(Exception ex){
+			log.error("problem constructing dataset xml. returning empty", ex);
+			xml = null;
+		}
+		return xml;
+	}
+
+	private List<DatasetDocument.Identifier> extractIdentifierFallbackURL(JSONObject document){
+		List<String> urls = JSONLDUtils.extractString(document, "url");
+
+		ArrayList<DatasetDocument.Identifier> curated = new ArrayList<>();
+		for(String item : urls){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(new DatasetDocument.Identifier(DatasetDocument.Identifier.IdentifierType.URL,  item.trim()));
+		}
+		return curated;
+	}
+
+	private List<DatasetDocument.SpatialCoverage> extractSpatialCoverage(JSONObject document){
+		List<JSONLDUtils.PlaceInfo> spatials = JSONLDUtils.extractPlaces(document, "spatialCoverage");
+
+		ArrayList<DatasetDocument.SpatialCoverage> curated = new ArrayList<>();
+		for(JSONLDUtils.PlaceInfo item : spatials){
+			if((item.name == null || item.name.trim().length() == 0) &&
+					(item.geoCoordinates == null || item.geoCoordinates.size() == 0) &&
+					(item.geoShapes == null || item.geoShapes.size() == 0)) continue;
+
+			List<DatasetDocument.SpatialCoverage.Point> points = new ArrayList<>();
+			List<String> boxes = new ArrayList<>();
+			if(item.geoCoordinates!=null) {
+				for (JSONLDUtils.GeoCoordinatesInfo iter : item.geoCoordinates){
+					points.add(new DatasetDocument.SpatialCoverage.Point(iter.latitude, iter.longitude));
+				}
+			}
+			if(item.geoShapes!=null) {
+				for (JSONLDUtils.GeoShapeInfo iter : item.geoShapes){
+					boxes.add(iter.box);
+				}
+			}
+			curated.add(new DatasetDocument.SpatialCoverage(item.name, points, boxes));
+		}
+		return curated;
+	}
+
+	private List<String> extractDescription(JSONObject document){
+		List<String> descriptions = JSONLDUtils.extractString(document, "description");
+
+		ArrayList<String> curated = new ArrayList<>();
+		for(String item : descriptions){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(item);
+		}
+		return curated;
+	}
+
+	private List<String> extractDisambiguatingDescription(JSONObject document){
+		List<String> descriptions = JSONLDUtils.extractString(document, "disambiguatingDescription");
+
+		ArrayList<String> curated = new ArrayList<>();
+		for(String item : descriptions){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(item);
+		}
+		return curated;
+	}
+
+	private List<DatasetDocument.License> extractLicense(JSONObject document){
+		List<JSONLDUtils.LicenseInfo> licenses = JSONLDUtils.extractLicenses(document, "license");
+
+		ArrayList<DatasetDocument.License> curated = new ArrayList<>();
+		for(JSONLDUtils.LicenseInfo item : licenses){
+			if(item.url == null || item.url.trim().length() == 0) continue;
+			curated.add(new DatasetDocument.License(item.name, item.url));
+		}
+		return curated;
+	}
+
+	private List<String> extractVersion(JSONObject document){
+		List<String> versions = JSONLDUtils.extractString(document, "version");
+
+		ArrayList<String> curated = new ArrayList<>();
+		for(String item : versions){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(item);
+		}
+		return curated;
+	}
+
+	private List<String> extractSize(JSONObject document) {
+		List<String> sizes = JSONLDUtils.extractSize(document, "distribution");
+
+		HashSet<String> curated = new HashSet<>();
+		for (String item : sizes) {
+			if (item == null || item.trim().length() == 0) continue;
+			curated.add(item);
+		}
+		return new ArrayList<>(curated);
+	}
+
+	private List<String> extractEncodingFormat(JSONObject document){
+		List<String> formats = JSONLDUtils.extractEncodingFormat(document, "distribution");
+
+		HashSet<String> curated = new HashSet<>();
+		for(String item : formats){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(item);
+		}
+		return new ArrayList<>(curated);
+	}
+
+	//TODO: Handle different citation types. Currently only urls
+	private List<DatasetDocument.Citation> extractCitations(JSONObject document){
+		List<JSONLDUtils.CitationInfo> citations = JSONLDUtils.extractCitations(document, "citation");
+
+		ArrayList<DatasetDocument.Citation> curated = new ArrayList<>();
+		for(JSONLDUtils.CitationInfo item : citations){
+			if(item.url == null || item.url.trim().length() == 0) continue;
+			try{
+				new URL(item.url);
+			}catch (Exception ex){
+				continue;
+			}
+			curated.add(new DatasetDocument.Citation(item.url, DatasetDocument.Citation.CitationIdentifierType.URL));
+		}
+		return curated;
+	}
+
+	private List<DatasetDocument.AlternateIdentifier> extractAlternateIdentifiers(JSONObject document){
+		List<String> issns = JSONLDUtils.extractString(document, "issn");
+		List<String> urls = JSONLDUtils.extractString(document, "url");
+
+		ArrayList<DatasetDocument.AlternateIdentifier> curated = new ArrayList<>();
+		for(String item : issns){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(new DatasetDocument.AlternateIdentifier(item.trim(), "ISSN"));
+		}
+		for(String item : urls){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(new DatasetDocument.AlternateIdentifier(item.trim(), "URL"));
+		}
+		return curated;
+	}
+
+	private List<DatasetDocument.ResourceType> extractResourceTypes(JSONObject document){
+		List<DatasetDocument.ResourceType> resourceTypes = new ArrayList<>();
+		resourceTypes.add(new DatasetDocument.ResourceType(DatasetDocument.ResourceType.ResourceTypeGeneralType.Dataset));
+		return resourceTypes;
+	}
+
+	private List<String> extractLanguages(JSONObject document){
+		List<String> languages = JSONLDUtils.extractLanguage(document, "inLanguage");
+
+		ArrayList<String> curated = new ArrayList<>();
+		for(String item : languages){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(item);
+		}
+		return curated;
+	}
+
+	private List<LocalDate> extractUpdatedDate(JSONObject document){
+		List<LocalDate> updatedDates = new ArrayList<>();
+		if(this.options.getUpdatedDateOptions() == null || this.options.getUpdatedDateOptions().format == null || this.options.getUpdatedDateOptions().format.length() == 0) return updatedDates;
+
+		DateTimeFormatter formatter = DateTimeFormatter.ofPattern(this.options.getPublicationDateOptions().format);
+
+		List<String> dates = JSONLDUtils.extractString(document, "dateModified");
+		for(String updatedDate : dates){
+			if(updatedDate == null || updatedDate.trim().length() == 0) continue;
+			try {
+				LocalDate localDate = LocalDate.parse(updatedDate, formatter);
+				updatedDates.add(localDate);
+			} catch (Exception e) {
+				continue;
+			}
+		}
+		return updatedDates;
+	}
+
+	private List<LocalDate> extractCreatedDate(JSONObject document){
+		List<LocalDate> createdDates = new ArrayList<>();
+		if(this.options.getCreatedDateOptions() == null || this.options.getCreatedDateOptions().format == null || this.options.getCreatedDateOptions().format.length() == 0) return createdDates;
+
+		DateTimeFormatter formatter = DateTimeFormatter.ofPattern(this.options.getCreatedDateOptions().format);
+
+		List<String> dates = JSONLDUtils.extractString(document, "dateCreated");
+		for(String createdDate : dates){
+			if(createdDate == null || createdDate.trim().length() == 0) continue;
+			try {
+				LocalDate localDate = LocalDate.parse(createdDate, formatter);
+				createdDates.add(localDate);
+			} catch (Exception e) {
+				continue;
+			}
+		}
+		return createdDates;
+	}
+
+	private List<DatasetDocument.Contributor> extractContributors(JSONObject document){
+		List<JSONLDUtils.PrincipalInfo> editors = JSONLDUtils.extractPrincipal(document, "editor");
+		List<JSONLDUtils.PrincipalInfo> funders = JSONLDUtils.extractPrincipal(document, "funder");
+		List<JSONLDUtils.PrincipalInfo> producers = JSONLDUtils.extractPrincipal(document, "producer");
+		List<JSONLDUtils.PrincipalInfo> sponsors = JSONLDUtils.extractPrincipal(document, "sponsor");
+		List<JSONLDUtils.PrincipalInfo> constributors = JSONLDUtils.extractPrincipal(document, "contributor");
+
+		ArrayList<DatasetDocument.Contributor> curated = new ArrayList<>();
+		for(JSONLDUtils.PrincipalInfo item : editors){
+			if(item.name() == null || item.name().trim().length() == 0) continue;
+			curated.add(new DatasetDocument.Contributor(item.name(), item.affiliationNames(), DatasetDocument.Contributor.ContributorType.Editor));
+		}
+		for(JSONLDUtils.PrincipalInfo item : funders){
+			if(item.name() == null || item.name().trim().length() == 0) continue;
+			curated.add(new DatasetDocument.Contributor(item.name(), item.affiliationNames(), DatasetDocument.Contributor.ContributorType.Funder));
+		}
+		for(JSONLDUtils.PrincipalInfo item : producers){
+			if(item.name() == null || item.name().trim().length() == 0) continue;
+			curated.add(new DatasetDocument.Contributor(item.name(), item.affiliationNames(), DatasetDocument.Contributor.ContributorType.Producer));
+		}
+		for(JSONLDUtils.PrincipalInfo item : sponsors){
+			if(item.name() == null || item.name().trim().length() == 0) continue;
+			curated.add(new DatasetDocument.Contributor(item.name(), item.affiliationNames(), DatasetDocument.Contributor.ContributorType.Sponsor));
+		}
+		for(JSONLDUtils.PrincipalInfo item : constributors){
+			if(item.name() == null || item.name().trim().length() == 0) continue;
+			DatasetDocument.Contributor.ContributorType type = DatasetDocument.Contributor.ContributorType.Other;
+			if(this.options.getContributorOptions()!=null && this.options.getContributorOptions().fallbackType != null) type = this.options.getContributorOptions().fallbackType;
+			curated.add(new DatasetDocument.Contributor(item.name(), item.affiliationNames(), type));
+		}
+		return curated;
+	}
+
+	private List<String> extractSubjects(JSONObject document){
+		List<String> subjects = JSONLDUtils.extractString(document, "keywords");
+
+		ArrayList<String> curated = new ArrayList<>();
+		for(String item : subjects){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(item);
+		}
+		return curated;
+	}
+
+	private List<LocalDate> extractPublicationDate(JSONObject document){
+		List<LocalDate> publicationDates = new ArrayList<>();
+		if(this.options.getPublicationDateOptions() == null || this.options.getPublicationDateOptions().format == null || this.options.getPublicationDateOptions().format.length() == 0) return publicationDates;
+
+		DateTimeFormatter formatter = DateTimeFormatter.ofPattern(this.options.getPublicationDateOptions().format);
+
+		List<String> dates = JSONLDUtils.extractString(document, "datePublished");
+		for(String publicationDate : dates){
+			if(publicationDate == null || publicationDate.trim().length() == 0) continue;
+			try {
+				LocalDate localDate = LocalDate.parse(publicationDate, formatter);
+				publicationDates.add(localDate);
+			} catch (Exception e) {
+				continue;
+			}
+		}
+		return publicationDates;
+	}
+
+	private List<String> extractPublisher(JSONObject document){
+		List<JSONLDUtils.PrincipalInfo> publishers = JSONLDUtils.extractPrincipal(document, "publisher");
+
+		ArrayList<String> curated = new ArrayList<>();
+		for(JSONLDUtils.PrincipalInfo item : publishers){
+			if(item.name() == null || item.name().trim().length() == 0) continue;
+			curated.add(item.name());
+		}
+		return curated;
+	}
+
+	private List<String> extractTitles(JSONObject document){
+		List<String> names = JSONLDUtils.extractString(document, "name");
+		List<String> headlines = JSONLDUtils.extractString(document, "headline");
+
+		HashSet<String> titles = new HashSet<>();
+		titles.addAll(names);
+		titles.addAll(headlines);
+		return new ArrayList<>(titles);
+	}
+
+	private List<String> extractAlternateTitles(JSONObject document){
+		List<String> names = JSONLDUtils.extractString(document, "alternateName");
+		List<String> headlines = JSONLDUtils.extractString(document, "alternativeHeadline");
+
+		HashSet<String> titles = new HashSet<>();
+		titles.addAll(names);
+		titles.addAll(headlines);
+		return new ArrayList<>(titles);
+	}
+
+	private List<DatasetDocument.Identifier> extractIdentifier(JSONObject document){
+		List<DatasetDocument.Identifier> curated = new ArrayList<>();
+
+		List<JSONLDUtils.IdentifierInfo> identifiers = JSONLDUtils.extractIdentifier(document, "identifier");
+
+		for(JSONLDUtils.IdentifierInfo item : identifiers){
+			if(item.value == null || item.value.trim().length() == 0) continue;
+			if(item.type == null || item.type.trim().length() == 0) {
+				if (this.options.getIdentifierOptions().fallbackType == null) continue;
+				curated.add(new DatasetDocument.Identifier(this.options.getIdentifierOptions().fallbackType, item.value.trim()));
+			}
+			else {
+				DatasetDocument.Identifier.IdentifierType type = null;
+				if(this.options.getIdentifierOptions().mappingARK != null && this.options.getIdentifierOptions().mappingARK.contains(item.type.trim())) type = DatasetDocument.Identifier.IdentifierType.ARK;
+				else if(this.options.getIdentifierOptions().mappingDOI != null && this.options.getIdentifierOptions().mappingDOI.contains(item.type.trim())) type = DatasetDocument.Identifier.IdentifierType.DOI;
+				else if(this.options.getIdentifierOptions().mappingHandle != null && this.options.getIdentifierOptions().mappingHandle.contains(item.type.trim())) type = DatasetDocument.Identifier.IdentifierType.Handle;
+				else if(this.options.getIdentifierOptions().mappingPURL != null && this.options.getIdentifierOptions().mappingPURL.contains(item.type.trim())) type = DatasetDocument.Identifier.IdentifierType.PURL;
+				else if(this.options.getIdentifierOptions().mappingURL != null && this.options.getIdentifierOptions().mappingURL.contains(item.type.trim())) type = DatasetDocument.Identifier.IdentifierType.URL;
+				else if(this.options.getIdentifierOptions().mappingURN != null && this.options.getIdentifierOptions().mappingURN.contains(item.type.trim())) type = DatasetDocument.Identifier.IdentifierType.URN;
+
+				if(type == null) continue;
+				curated.add(new DatasetDocument.Identifier(type, item.value.trim()));
+			}
+		}
+		return curated;
+	}
+
+	private List<DatasetDocument.Creator> extractCreator(JSONObject document){
+		List<JSONLDUtils.PrincipalInfo> creators = JSONLDUtils.extractPrincipal(document, "creator");
+		List<JSONLDUtils.PrincipalInfo> authors = JSONLDUtils.extractPrincipal(document, "author");
+
+		HashSet<String> foundNames = new HashSet<>();
+		List<DatasetDocument.Creator> curated = new ArrayList<>();
+		for(JSONLDUtils.PrincipalInfo item : creators){
+			if(item.name() == null || item.name().trim().length() == 0) continue;
+			if(foundNames.contains(item.name())) continue;
+			foundNames.add(item.name());
+			curated.add(new DatasetDocument.Creator(item.name(), item.affiliationNames()));
+		}
+		for(JSONLDUtils.PrincipalInfo item : authors){
+			if(item.name() == null || item.name().trim().length() == 0) continue;
+			if(foundNames.contains(item.name())) continue;
+			foundNames.add(item.name());
+
+			curated.add(new DatasetDocument.Creator(item.name(), item.affiliationNames()));
+		}
+		return curated;
+	}
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/EndpointAccessIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/EndpointAccessIterator.java
@ -0,0 +1,106 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.json.JSONObject;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Iterator;
+
+public class EndpointAccessIterator implements Iterator<JSONObject> {
+	private static final Log log = LogFactory.getLog(EndpointAccessIterator.class);
+
+	public static class Options {
+
+		private Charset charset;
+
+		public Options(){}
+
+		public Options(Charset charset) {
+			this.charset = charset;
+		}
+
+		public Charset getCharset() {
+			return charset;
+		}
+
+		public void setCharset(Charset charset) {
+			this.charset = charset;
+		}
+	}
+
+	private Options options;
+	private Iterator<String> repositoryIterator;
+
+	public EndpointAccessIterator(Options options, Iterator<String> repositoryIterator) {
+		this.options = options;
+		this.repositoryIterator = repositoryIterator;
+	}
+
+	@Override
+	public boolean hasNext() {
+		return this.repositoryIterator.hasNext();
+	}
+
+	@Override
+	public JSONObject next() {
+		String endpoint = this.repositoryIterator.next();
+		if(endpoint == null) return null;
+
+		log.debug(String.format("processing: %s", endpoint));
+
+		JSONObject dataset = this.extractDatasetRecord(endpoint);
+
+		return dataset;
+	}
+
+	private JSONObject extractDatasetRecord(String endpoint) {
+		JSONObject datasetDocument = null;
+		try {
+			URL urlEndpoint = new URL(endpoint);
+			log.debug("downloading endpoint "+urlEndpoint);
+			String payload = Utils.RemoteAccessWithRetry(3, 5000, urlEndpoint, this.options.getCharset());
+
+			log.trace("downloaded payload id: "+payload);
+			Document doc = Jsoup.parse(payload);
+			Elements scriptTags = doc.getElementsByTag("script");
+			for (Element scriptTag : scriptTags) {
+				if (!scriptTag.hasAttr("type")) continue;
+				String scriptType = scriptTag.attr("type");
+				if (!scriptType.equalsIgnoreCase("application/ld+json")) continue;
+
+				String data = scriptTag.data();
+				JSONObject schemaItem = new JSONObject(data);
+				String context = schemaItem.optString("@context");
+				String type = schemaItem.optString("@type");
+
+				if (context == null || type == null) continue;
+
+				Boolean isSchemaOrgContext = context.toLowerCase().startsWith("http://schema.org") || context.toLowerCase().startsWith("https://schema.org");
+				Boolean isDataset = type.equalsIgnoreCase("dataset");
+
+				if (!isSchemaOrgContext || !isDataset) continue;
+
+				log.debug(String.format("discovered dataset document: %s", schemaItem.toString()));
+
+				datasetDocument = schemaItem;
+				break;
+			}
+		}catch(Exception ex){
+			log.error("problem extracting dataset document. returning empty", ex);
+			datasetDocument = null;
+		}
+		if(datasetDocument == null){
+			log.debug("did not find any dataset document in endpoint");
+		}
+		else{
+			log.debug("found dataset document in endpoint :"+datasetDocument.toString());
+		}
+		return datasetDocument;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/JSONLDUtils.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/JSONLDUtils.java
@ -0,0 +1,515 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import org.json.JSONArray;
+import org.json.JSONObject;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class JSONLDUtils {
+
+	public interface PrincipalInfo{
+		String name();
+		List<String> affiliationNames();
+
+	}
+
+	public static class OrganizationInfo implements PrincipalInfo{
+		public String name;
+
+		public String name(){return this.name;}
+
+		public List<String> affiliationNames(){
+			return null;
+		}
+
+		public OrganizationInfo(){}
+
+		public OrganizationInfo(String name){
+			this.name = name;
+		}
+	}
+
+	public static class PersonInfo implements PrincipalInfo{
+		public String name;
+		public List<OrganizationInfo> affiliations;
+
+		public String name(){return this.name;}
+
+		public List<String> affiliationNames(){
+			if(this.affiliations == null) return null;
+			List<String> curated = new ArrayList<>();
+			for(OrganizationInfo item : this.affiliations){
+				if(item == null || item.name == null || item.name.trim().length() == 0) continue;;
+				curated.add(item.name.trim());
+			}
+			return curated;
+		}
+
+		public PersonInfo(){}
+
+		public PersonInfo(String name){
+			this.name = name;
+		}
+
+		public PersonInfo(String name, List<OrganizationInfo> affiliations){
+			this.name = name;
+			this.affiliations = affiliations;
+		}
+	}
+
+	public static class LicenseInfo{
+		public String name;
+		public String url;
+
+		public LicenseInfo(){}
+
+		public LicenseInfo(String url){
+			this.url = url;
+		}
+
+		public LicenseInfo(String url, String name){
+			this.name = name;
+			this.url = url;
+		}
+	}
+
+	public static class CitationInfo{
+		public String url;
+
+		public CitationInfo(){}
+
+		public CitationInfo(String url){
+			this.url = url;
+		}
+	}
+
+	public static class IdentifierInfo{
+		public String value;
+		public String type;
+
+		public IdentifierInfo(){}
+
+		public IdentifierInfo(String value){
+			this.value = value;
+		}
+
+		public IdentifierInfo(String value, String type){
+			this.value = value;
+			this.type = type;
+		}
+	}
+
+	public static class GeoCoordinatesInfo{
+		public String latitude;
+		public String longitude;
+
+		public GeoCoordinatesInfo(){}
+
+		public GeoCoordinatesInfo(String latitude, String longitude){
+			this.latitude = latitude;
+			this.longitude = longitude;
+		}
+	}
+
+	public static class GeoShapeInfo{
+		public String box;
+
+		public GeoShapeInfo(){}
+
+		public GeoShapeInfo(String box){
+			this.box = box;
+		}
+	}
+
+	public static class PlaceInfo{
+		public String name;
+		public List<GeoCoordinatesInfo> geoCoordinates;
+		public List<GeoShapeInfo> geoShapes;
+
+		public PlaceInfo(){}
+
+		public PlaceInfo(String name, List<GeoCoordinatesInfo> geoCoordinates,  List<GeoShapeInfo> geoShapes){
+			this.name = name;
+			this.geoCoordinates = geoCoordinates;
+			this.geoShapes = geoShapes;
+		}
+	}
+
+	private static PlaceInfo extractPlaceSingle(JSONObject document){
+		if(document == null || !"Place".equals(document.optString("@type"))) return null;
+		String name = document.optString("name");
+		List<GeoCoordinatesInfo> geoCoordinates = JSONLDUtils.extractGeoCoordinates(document, "geo");
+		List<GeoShapeInfo> geoShapes = JSONLDUtils.extractGeoShapes(document, "geo");
+		if((name==null || name.trim().length() == 0) &&
+				(geoCoordinates == null || geoCoordinates.size() == 0) &&
+				(geoShapes == null || geoShapes.size() == 0)) return null;
+		return new PlaceInfo(name, geoCoordinates, geoShapes);
+	}
+
+	public static List<PlaceInfo> extractPlaces(JSONObject document, String key) {
+		List<PlaceInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				PlaceInfo nfo = JSONLDUtils.extractPlaceSingle(array.optJSONObject(i));
+				if(nfo!=null) items.add(nfo);
+			}
+		}else if (obj!=null) {
+			PlaceInfo nfo = JSONLDUtils.extractPlaceSingle(obj);
+			if(nfo!=null) items.add(nfo);
+		}
+
+		return items;
+	}
+
+	private static GeoCoordinatesInfo extractGeoCoordinatesSingle(JSONObject document){
+		if(document == null || !"GeoCoordinates".equals(document.optString("@type"))) return null;
+		String latitude = document.optString("latitude");
+		String longitude = document.optString("longitude");
+		if(latitude==null || latitude.trim().length()==0 || longitude==null || longitude.trim().length()==0) return null;
+		return new GeoCoordinatesInfo(latitude, longitude);
+	}
+
+	private static List<GeoCoordinatesInfo> extractGeoCoordinates(JSONObject document, String key) {
+		List<GeoCoordinatesInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				GeoCoordinatesInfo nfo = JSONLDUtils.extractGeoCoordinatesSingle(array.optJSONObject(i));
+				if(nfo!=null) items.add(nfo);
+			}
+		}else if (obj!=null) {
+			GeoCoordinatesInfo nfo = JSONLDUtils.extractGeoCoordinatesSingle(obj);
+			if(nfo!=null) items.add(nfo);
+		}
+
+		return items;
+	}
+
+	private static GeoShapeInfo extractGeoShapeSingle(JSONObject document){
+		if(document == null || !"GeoShape".equals(document.optString("@type"))) return null;
+		String box = document.optString("box");
+		if(box==null || box.trim().length()==0 ) return null;
+		return new GeoShapeInfo(box);
+	}
+
+	private static List<GeoShapeInfo> extractGeoShapes(JSONObject document, String key) {
+		List<GeoShapeInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				GeoShapeInfo nfo = JSONLDUtils.extractGeoShapeSingle(array.optJSONObject(i));
+				if(nfo!=null) items.add(nfo);
+			}
+		}else if (obj!=null) {
+			GeoShapeInfo nfo = JSONLDUtils.extractGeoShapeSingle(obj);
+			if(nfo!=null) items.add(nfo);
+		}
+
+		return items;
+	}
+
+	private static OrganizationInfo extractOrganizationSingle(JSONObject document){
+		if(document == null || !"Organization".equals(document.optString("@type"))) return null;
+		String name = document.optString("name");
+		if(name==null || name.trim().length()==0) return null;
+		return new OrganizationInfo(name);
+	}
+
+	private static List<OrganizationInfo> extractOrganization(JSONObject document, String key) {
+		List<OrganizationInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				OrganizationInfo nfo = JSONLDUtils.extractOrganizationSingle(array.optJSONObject(i));
+				if(nfo!=null) items.add(nfo);
+			}
+		}else if (obj!=null) {
+			OrganizationInfo nfo = JSONLDUtils.extractOrganizationSingle(obj);
+			if(nfo!=null) items.add(nfo);
+		}
+
+		return items;
+	}
+
+	private static PersonInfo extractPersonSingle(JSONObject document) {
+		if(document == null || !"Person".equals(document.optString("@type"))) return null;
+		String name = document.optString("name");
+		String givenName = document.optString("givenName");
+		String familyName = document.optString("familyName");
+		if ((name == null || name.trim().length() == 0) && (givenName!=null || familyName !=null)) {
+			if(givenName !=null && familyName!=null) name = String.join(" ", familyName, givenName).trim();
+			else if (givenName == null) name = familyName;
+			else if (familyName == null) name = givenName;
+		}
+		if(name==null || name.trim().length()==0) return null;
+		List<OrganizationInfo> affiliations = JSONLDUtils.extractOrganization(document, "affiliation");
+		return new PersonInfo(name, affiliations);
+	}
+
+	private static List<PersonInfo> extractPerson(JSONObject document, String key) {
+		List<PersonInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				PersonInfo nfo = JSONLDUtils.extractPersonSingle(array.optJSONObject(i));
+				if(nfo!=null) items.add(nfo);
+			}
+		}else if (obj!=null) {
+			PersonInfo nfo = JSONLDUtils.extractPersonSingle(obj);
+			if(nfo!=null) items.add(nfo);
+		} else {
+			String value = document.optString(key);
+			if (value != null) items.add(new PersonInfo(value));
+		}
+
+		return items;
+	}
+
+	public static PrincipalInfo extractPrincipalSingle(JSONObject document) {
+		PrincipalInfo principal = JSONLDUtils.extractPersonSingle(document);
+		if(principal == null) principal = JSONLDUtils.extractOrganizationSingle(document);
+		return principal;
+	}
+
+	public static List<PrincipalInfo> extractPrincipal(JSONObject document, String key) {
+		List<PrincipalInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				PrincipalInfo nfo = JSONLDUtils.extractPrincipalSingle(array.optJSONObject(i));
+				if(nfo!=null) items.add(nfo);
+			}
+		}else if (obj!=null) {
+			PrincipalInfo nfo = JSONLDUtils.extractPrincipalSingle(obj);
+			if(nfo!=null) items.add(nfo);
+		} else {
+			String value = document.optString(key);
+			if (value != null) items.add(new PersonInfo(value));
+		}
+
+		return items;
+	}
+
+	public static List<String> extractString(JSONObject document, String key){
+		List<String> items = new ArrayList<>();
+
+		if (!document.has(key)) return items;
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				JSONObject item = array.optJSONObject(i);
+				if(item != null) continue;
+				String value = array.optString(i);
+				if(value == null) continue;
+				items.add(value);
+			}
+		} else if (obj == null) {
+			String value = document.optString(key);
+			if(value != null) items.add(value);
+		}
+
+		return items;
+
+	}
+
+	public static List<String> extractSize(JSONObject document, String key){
+		List<String> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				JSONObject item = array.optJSONObject(i);
+				if (item == null || !"DataDownload".equals((item.optString("@type")))) continue;
+				String size = item.optString("contentSize");
+				if (size != null) items.add(size);
+			}
+		} else if (obj != null) {
+			String size = obj.optString("contentSize");
+			if ("DataDownload".equals((obj.optString("@type"))) && size != null) {
+				items.add(size);
+			}
+		}
+
+		return items;
+	}
+
+	public static List<String> extractEncodingFormat(JSONObject document, String key){
+		List<String> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				JSONObject item = array.optJSONObject(i);
+				if (item == null || !"DataDownload".equals((item.optString("@type")))) continue;
+				String encodingFormat = item.optString("encodingFormat");
+				if (encodingFormat != null) items.add(encodingFormat);
+				String fileFormat = item.optString("fileFormat");
+				if (fileFormat != null) items.add(fileFormat);
+			}
+		} else if (obj != null) {
+			if ("DataDownload".equals((obj.optString("@type")))) {
+				String encodingFormat = obj.optString("encodingFormat");
+				if (encodingFormat != null) items.add(encodingFormat);
+				String fileFormat = obj.optString("fileFormat");
+				if (fileFormat != null) items.add(fileFormat);
+			}
+		}
+
+		return items;
+	}
+
+	public static List<String> extractLanguage(JSONObject document, String key){
+		List<String> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				JSONObject item = array.optJSONObject(i);
+				if (item == null) {
+					String value = array.optString(i);
+					if (value != null) items.add(value);
+				} else {
+					if (!"Language".equals((item.optString("@type")))) continue;
+					String name = item.optString("name");
+					if (name != null) items.add(name);
+					String alternateName = item.optString("alternateName");
+					if (alternateName != null) items.add(alternateName);
+				}
+			}
+		} else if (obj != null) {
+			if ("Language".equals((obj.optString("@type")))){
+				String name = obj.optString("name");
+				if (name != null) items.add(name);
+				String alternateName = obj.optString("alternateName");
+				if (alternateName != null) items.add(alternateName);
+			}
+		} else {
+			String value = document.optString(key);
+			if (value != null) items.add(value);
+		}
+
+		return items;
+	}
+
+	public static List<LicenseInfo> extractLicenses(JSONObject document, String key){
+		List<LicenseInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				JSONObject item = array.optJSONObject(i);
+				if (item == null) {
+					String value = array.optString(i);
+					if(value != null) items.add(new LicenseInfo(value));
+				} else {
+					if (!"CreativeWork".equals((item.optString("@type")))) continue;
+					String url = item.optString("url");
+					String name = item.optString("name");
+					if (url != null || name != null) items.add(new LicenseInfo(url, name));
+				}
+			}
+		} else if (obj != null) {
+			if("CreativeWork".equals((obj.optString("@type")))) {
+				String url = obj.optString("url");
+				String name = obj.optString("name");
+				if (url != null || name != null) items.add(new LicenseInfo(url, name));
+			}
+		} else {
+			String value = document.optString(key);
+			if (value != null)  items.add(new LicenseInfo(value));
+		}
+
+		return items;
+	}
+
+	public static List<CitationInfo> extractCitations(JSONObject document, String key){
+		List<CitationInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				JSONObject item = array.optJSONObject(i);
+				if (item == null) {
+					String value = array.optString(i);
+					if(value != null) items.add(new CitationInfo(value));
+				} else {
+					if (!"CreativeWork".equals((item.optString("@type")))) continue;
+					String url = item.optString("url");
+					if (url != null) items.add(new CitationInfo(url));
+				}
+			}
+		} else if (obj != null) {
+			if("CreativeWork".equals((obj.optString("@type")))) {
+				String url = obj.optString("url");
+				if (url != null) items.add(new CitationInfo(url));
+			}
+		} else {
+			String value = document.optString(key);
+			if (value != null) items.add(new CitationInfo(value));
+		}
+
+		return items;
+	}
+
+	private static IdentifierInfo extractIdentifierSingle(JSONObject document){
+		if(document == null || !"PropertyValue".equals(document.optString("@type"))) return null;
+		String name = document.optString("name");
+		String value = document.optString("value");
+		if(value==null || value.trim().length()==0) return null;
+		return new IdentifierInfo(value, name);
+	}
+
+	public static List<IdentifierInfo> extractIdentifier(JSONObject document, String key) {
+		List<IdentifierInfo> items = new ArrayList<>();
+
+		JSONArray array = document.optJSONArray(key);
+		JSONObject obj = document.optJSONObject(key);
+
+		if (array != null) {
+			for (int i = 0; i < array.length(); i += 1) {
+				IdentifierInfo nfo = null;
+				if (array.optJSONObject(i) == null) {
+					String value = array.optString(i);
+					if (value != null) nfo = new IdentifierInfo(value);
+				}
+				if (nfo == null) nfo = JSONLDUtils.extractIdentifierSingle(array.optJSONObject(i));
+				if (nfo != null) items.add(nfo);
+			}
+		}else if (obj!=null) {
+			IdentifierInfo nfo = JSONLDUtils.extractIdentifierSingle(obj);
+			if (nfo != null) items.add(nfo);
+		} else {
+			String value = document.optString(key);
+			if (value != null)  items.add(new IdentifierInfo(value));
+		}
+
+		return items;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/RepositoryIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/RepositoryIterable.java
@ -0,0 +1,7 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import java.util.Iterator;
+
+public interface RepositoryIterable extends Iterable<String> {
+	public static String TerminationHint = "df667391-676d-4c0f-9c40-426b1001607a";
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/RepositoryQueueIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/RepositoryQueueIterator.java
@ -0,0 +1,92 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+public class RepositoryQueueIterator implements Iterator<String> {
+	private static final Log log = LogFactory.getLog(RepositoryQueueIterator.class);
+
+	public static class Options {
+		private Boolean blockPolling;
+		private long pollTimeout;
+		private TimeUnit pollTimeoutUnit;
+
+		public Boolean getBlockPolling() {
+			return blockPolling;
+		}
+
+		public void setBlockPolling(Boolean blockPolling) {
+			this.blockPolling = blockPolling;
+		}
+
+		public long getPollTimeout() {
+			return pollTimeout;
+		}
+
+		public void setPollTimeout(long pollTimeout) {
+			this.pollTimeout = pollTimeout;
+		}
+
+		public TimeUnit getPollTimeoutUnit() {
+			return pollTimeoutUnit;
+		}
+
+		public void setPollTimeoutUnit(TimeUnit pollTimeoutUnit) {
+			this.pollTimeoutUnit = pollTimeoutUnit;
+		}
+	}
+
+	private ArrayBlockingQueue<String> queue;
+	private Options options;
+	private boolean hasTerminated;
+
+	public RepositoryQueueIterator(Options options, ArrayBlockingQueue<String> queue) {
+		this.options = options;
+		this.queue = queue;
+		this.hasTerminated = false;
+	}
+
+	@Override
+	public boolean hasNext() {
+		if(this.hasTerminated) return false;
+		return true;
+	}
+
+	@Override
+	public String next() {
+		String next = this.poll();
+		log.debug("next endpoint to process: " + next);
+		if (next != null && next.equalsIgnoreCase(RepositoryIterable.TerminationHint)) {
+			log.debug("no more endpoints to process");
+			this.hasTerminated = true;
+			next = null;
+		}
+
+		return next;
+	}
+
+	private String poll(){
+		String item = null;
+		log.debug("retrieving endpoint from queue");
+		log.debug("queue size: " + queue.size());
+		if(this.options.getBlockPolling()) {
+			try {
+				item = this.queue.poll(this.options.getPollTimeout(), this.options.getPollTimeoutUnit());
+			} catch (InterruptedException ex) {
+				log.warn(String.format("could not poll elements from queue for more than %s %s. throwing", this.options.getPollTimeout(), this.options.getPollTimeoutUnit()));
+				throw new NoSuchElementException(ex.getMessage());
+			}
+		}
+		else {
+			item = this.queue.poll();
+		}
+		log.debug("retrieved endpoint from queue");
+		log.debug("queue size: " + queue.size());
+		return item;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/SchemaOrgIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/SchemaOrgIterable.java
@ -0,0 +1,49 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.util.Iterator;
+import java.util.concurrent.ArrayBlockingQueue;
+
+public class SchemaOrgIterable implements Iterable<String> {
+	private static final Log log = LogFactory.getLog(SchemaOrgIterable.class);
+
+	public static class Options {
+		private EndpointAccessIterator.Options endpointAccessOptions;
+		private DatasetMappingIterator.Options datasetMappingOptions;
+
+		public EndpointAccessIterator.Options getEndpointAccessOptions() {
+			return endpointAccessOptions;
+		}
+
+		public void setEndpointAccessOptions(EndpointAccessIterator.Options endpointAccessOptions) {
+			this.endpointAccessOptions = endpointAccessOptions;
+		}
+
+		public DatasetMappingIterator.Options getDatasetMappingOptions() {
+			return datasetMappingOptions;
+		}
+
+		public void setDatasetMappingOptions(DatasetMappingIterator.Options datasetMappingOptions) {
+			this.datasetMappingOptions = datasetMappingOptions;
+		}
+	}
+
+	private Options options;
+	private RepositoryIterable repository;
+
+	public SchemaOrgIterable(Options options, RepositoryIterable repository){
+		this.options = options;
+		this.repository = repository;
+	}
+
+	@Override
+	public Iterator<String> iterator() {
+		Iterator<String> repositoryIterator = this.repository.iterator();
+		EndpointAccessIterator endpointAccessIterator = new EndpointAccessIterator(options.getEndpointAccessOptions(), repositoryIterator);
+		DatasetMappingIterator datasetMappingIterator = new DatasetMappingIterator(options.getDatasetMappingOptions(), endpointAccessIterator);
+
+		return datasetMappingIterator;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/SchemaOrgMainKaggle.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/SchemaOrgMainKaggle.java
@ -0,0 +1,84 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.concurrent.TimeUnit;
+
+public class SchemaOrgMainKaggle {
+
+    private static final Log log = LogFactory.getLog(SchemaOrgMainKaggle.class);
+
+    public static void main(String[] args) throws Exception {
+
+        ConsoleAppender console = new ConsoleAppender();
+        console.setLayout(new PatternLayout("%d [%p|%c|%C{1}] %m%n"));
+        console.setThreshold(Level.DEBUG);
+        console.activateOptions();
+        Logger.getLogger("eu.dnetlib.data.collector.plugins").addAppender(console);
+
+        HashMap<String,String> params = new HashMap<>();
+        params.put("consumerBlockPolling", Boolean.toString(true));
+        params.put("consumerBlockPollingTimeout", "2");
+        params.put("consumerBlockPollingTimeoutUnit", TimeUnit.MINUTES.toString());
+        params.put("endpointCharset", StandardCharsets.UTF_8.name());
+        params.put("updatedDateFormat", "YYYY-MM-DD");
+        params.put("createdDateFormat", "YYYY-MM-DD");
+        params.put("publicationDateFormat", "YYYY-MM-DD");
+        params.put("contributorFallbackType", DatasetDocument.Contributor.ContributorType.Other.toString());
+        params.put("identifierFallbackType", DatasetDocument.Identifier.IdentifierType.Handle.toString());
+        params.put("identifierFallbackURL", Boolean.toString(true));
+        params.put("identifierMappingARK", "ark, ARK");
+        params.put("identifierMappingDOI", "doi, DOI");
+        params.put("identifierMappingHandle", "Handle, HANDLE");
+        params.put("identifierMappingPURL", "purl, PURL");
+        params.put("identifierMappingURN", "urn, URN");
+        params.put("identifierMappingURL", "url, URL");
+
+        params.put("repositoryAccessType", "httpapi-kaggle");
+
+        params.put("httpapi-kaggle_queueSize", "100");
+        params.put("httpapi-kaggle_APICharset", StandardCharsets.UTF_8.name());
+        params.put("httpapi-kaggle_queryUrl", "https://www.kaggle.com/datasets_v2.json?sortBy=updated&group=public&page={PAGE}&pageSize=20&size=sizeAll&filetype=fileTypeAll&license=licenseAll");
+        params.put("httpapi-kaggle_queryPagePlaceholder", "{PAGE}");
+        params.put("httpapi-kaggle_responsePropertyTotalDataset", "totalDatasetListItems");
+        params.put("httpapi-kaggle_responsePropertyDatasetList", "datasetListItems");
+        params.put("httpapi-kaggle_responsePropertyDatasetUrl", "datasetUrl");
+        params.put("httpapi-kaggle_responseBaseDatasetUrl", "https://www.kaggle.com");
+        params.put("httpapi-kaggle_producerBlockPollingTimeout", "2");
+        params.put("httpapi-kaggle_producerBlockPollingTimeoutUnit", TimeUnit.MINUTES.toString());
+
+        InterfaceDescriptor descriptor = new InterfaceDescriptor();
+        descriptor.setId("schema.org - kaggle");
+        descriptor.setBaseUrl("https://www.kaggle.com");
+
+        descriptor.setParams(params);
+
+        SchemaOrgPlugin schemaOrgPlugin = new SchemaOrgPlugin();
+
+        Iterable<String> iterable = schemaOrgPlugin.collect(descriptor, null, null);
+
+        String outDir = params.get("repositoryAccessType");
+
+        log.info("saving content in " + outDir);
+
+        File directory = new File(outDir);
+        if (directory.exists()) {
+            log.info(directory.getAbsolutePath() + " exists, cleaning up");
+            FileUtils.deleteDirectory(directory);
+        }
+        FileUtils.forceMkdir(directory);
+        Utils.writeFiles(iterable, outDir);
+
+    }
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/SchemaOrgMainReactome.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/SchemaOrgMainReactome.java
@ -0,0 +1,80 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapFileIterator;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.concurrent.TimeUnit;
+
+public class SchemaOrgMainReactome {
+
+    private static final Log log = LogFactory.getLog(SchemaOrgMainReactome.class);
+
+    public static void main(String[] args) throws Exception {
+
+        ConsoleAppender console = new ConsoleAppender();
+        console.setLayout(new PatternLayout("%d [%p|%c|%C{1}] %m%n"));
+        console.setThreshold(Level.DEBUG);
+        console.activateOptions();
+        Logger.getLogger("eu.dnetlib.data.collector.plugins").addAppender(console);
+
+        HashMap<String,String> params = new HashMap<>();
+        params.put("consumerBlockPolling", Boolean.toString(true));
+        params.put("consumerBlockPollingTimeout", "2");
+        params.put("consumerBlockPollingTimeoutUnit", TimeUnit.MINUTES.toString());
+        params.put("endpointCharset", StandardCharsets.UTF_8.name());
+        params.put("updatedDateFormat", "YYYY-MM-DD");
+        params.put("createdDateFormat", "YYYY-MM-DD");
+        params.put("publicationDateFormat", "YYYY-MM-DD");
+        params.put("contributorFallbackType", DatasetDocument.Contributor.ContributorType.Other.toString());
+        params.put("identifierFallbackType", DatasetDocument.Identifier.IdentifierType.Handle.toString());
+        params.put("identifierFallbackURL", Boolean.toString(true));
+        params.put("identifierMappingARK", "ark, ARK");
+        params.put("identifierMappingDOI", "doi, DOI");
+        params.put("identifierMappingHandle", "Handle, HANDLE");
+        params.put("identifierMappingPURL", "purl, PURL");
+        params.put("identifierMappingURN", "urn, URN");
+        params.put("identifierMappingURL", "url, URL");
+
+        params.put("repositoryAccessType", "sitemapindex");
+        params.put("sitemap_queueSize", "100");
+        params.put("sitemap_IndexCharset", StandardCharsets.UTF_8.name());
+        params.put("sitemap_FileCharset", StandardCharsets.UTF_8.name());
+        params.put("sitemap_FileSchema", SitemapFileIterator.Options.SitemapSchemaType.Text.toString());
+        params.put("sitemap_FileType", SitemapFileIterator.Options.SitemapFileType.GZ.toString());
+        params.put("sitemap_producerBlockPollingTimeout", "2");
+        params.put("sitemap_producerBlockPollingTimeoutUnit", TimeUnit.MINUTES.toString());
+
+        InterfaceDescriptor descriptor = new InterfaceDescriptor();
+        descriptor.setId("schema.org - reactome");
+        descriptor.setBaseUrl("https://reactome.org/sitemapindex.xml");
+
+        descriptor.setParams(params);
+
+        SchemaOrgPlugin schemaOrgPlugin = new SchemaOrgPlugin();
+
+        Iterable<String> iterable = schemaOrgPlugin.collect(descriptor, null, null);
+
+        String outDir = params.get("repositoryAccessType");
+
+        log.info("saving content in " + outDir);
+
+        File directory = new File(outDir);
+        if (directory.exists()) {
+            log.info(directory.getAbsolutePath() + " exists, cleaning up");
+            FileUtils.deleteDirectory(directory);
+        }
+        FileUtils.forceMkdir(directory);
+        Utils.writeFiles(iterable, outDir);
+    }
+
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/SchemaOrgPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/SchemaOrgPlugin.java
@ -0,0 +1,153 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.plugins.schemaorg.httpapi.kaggle.KaggleRepositoryIterable;
+import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapFileIterator;
+import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapIndexIterator;
+import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapIndexRepositoryIterable;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+public class SchemaOrgPlugin extends AbstractCollectorPlugin {
+
+    private static final Log log = LogFactory.getLog(SchemaOrgPlugin.class);
+
+    public String hello(){
+        return "hello";
+    }
+
+    @Override
+    public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) throws CollectorServiceException {
+        try {
+			RepositoryIterable repository = null;
+        	String repositoryAccessType = Utils.getAsString(interfaceDescriptor.getParams(), "repositoryAccessType", null);
+        	switch(repositoryAccessType) {
+				case "sitemapindex": {
+					SitemapIndexRepositoryIterable.Options repositoryOptions = this.compileSitemapIndexRepositoryOptions(interfaceDescriptor);
+					SitemapIndexRepositoryIterable repositoryIterable = new SitemapIndexRepositoryIterable(repositoryOptions);
+					repositoryIterable.bootstrap();
+					repository = repositoryIterable;
+					break;
+				}
+				case "httpapi-kaggle": {
+					KaggleRepositoryIterable.Options repositoryOptions = this.compileKaggleRepositoryOptions(interfaceDescriptor);
+					KaggleRepositoryIterable repositoryIterable = new KaggleRepositoryIterable(repositoryOptions);
+					repositoryIterable.bootstrap();
+					repository = repositoryIterable;
+					break;
+				}
+				default:
+					throw new CollectorServiceException(String.format("unrecognized repository access type ", repositoryAccessType));
+			}
+			SchemaOrgIterable.Options schemaOrgOptions = this.compileSchemaOrgOptions(interfaceDescriptor);
+            SchemaOrgIterable iterable = new SchemaOrgIterable(schemaOrgOptions, repository);
+            return iterable;
+        } catch (Exception e) {
+            throw new CollectorServiceException("Could not create iterator", e);
+        }
+    }
+
+	private KaggleRepositoryIterable.Options compileKaggleRepositoryOptions(InterfaceDescriptor interfaceDescriptor) throws MalformedURLException {
+		KaggleRepositoryIterable.Options kaggleRepositoryOptions = new KaggleRepositoryIterable.Options();
+		kaggleRepositoryOptions.setQueueSize(Utils.getAsInt(interfaceDescriptor.getParams(), "httpapi-kaggle_queueSize", 100));
+		kaggleRepositoryOptions.setPutTimeout(Utils.getAsLong(interfaceDescriptor.getParams(), "httpapi-kaggle_producerBlockPollingTimeout", 20));
+		kaggleRepositoryOptions.setPutTimeoutUnit(Utils.getAsEnum(interfaceDescriptor.getParams(), "httpapi-kaggle_producerBlockPollingTimeoutUnit", TimeUnit.MINUTES, TimeUnit.class));
+		kaggleRepositoryOptions.setCharset(Utils.getAsCharset(interfaceDescriptor.getParams(), "httpapi-kaggle_APICharset", StandardCharsets.UTF_8));
+		kaggleRepositoryOptions.setQueryUrl(Utils.getAsString(interfaceDescriptor.getParams(), "httpapi-kaggle_queryUrl", null));
+		kaggleRepositoryOptions.setQueryPagePlaceholder(Utils.getAsString(interfaceDescriptor.getParams(), "httpapi-kaggle_queryPagePlaceholder", "{PAGE}"));
+		kaggleRepositoryOptions.setResponsePropertyTotalDataset(Utils.getAsString(interfaceDescriptor.getParams(), "httpapi-kaggle_responsePropertyTotalDataset", "totalDatasetListItems"));
+		kaggleRepositoryOptions.setResponsePropertyDatasetList(Utils.getAsString(interfaceDescriptor.getParams(), "httpapi-kaggle_responsePropertyDatasetList", "datasetListItems"));
+		kaggleRepositoryOptions.setResponsePropertyDatasetUrl(Utils.getAsString(interfaceDescriptor.getParams(), "httpapi-kaggle_responsePropertyDatasetUrl", "datasetUrl"));
+		kaggleRepositoryOptions.setResponseBaseDatasetUrl(Utils.getAsString(interfaceDescriptor.getParams(), "httpapi-kaggle_responseBaseDatasetUrl", interfaceDescriptor.getBaseUrl()));
+		kaggleRepositoryOptions.setRepositoryQueueIteratorOptions(this.compileRepositoryQueueOptions(interfaceDescriptor));
+		return kaggleRepositoryOptions;
+
+	}
+
+    private SitemapIndexIterator.Options compileSitemapIndexOptions(InterfaceDescriptor interfaceDescriptor) throws MalformedURLException {
+		SitemapIndexIterator.Options sitemapIndexIteratorOptions = new SitemapIndexIterator.Options();
+		sitemapIndexIteratorOptions.setCharset(Utils.getAsCharset(interfaceDescriptor.getParams(), "sitemap_IndexCharset", StandardCharsets.UTF_8));
+		sitemapIndexIteratorOptions.setIndexUrl(new URL(interfaceDescriptor.getBaseUrl()));
+		return sitemapIndexIteratorOptions;
+
+	}
+
+	private SitemapFileIterator.Options compileSitemapFileOptions(InterfaceDescriptor interfaceDescriptor) throws MalformedURLException {
+		SitemapFileIterator.Options sitemapFileIteratorOptions = new SitemapFileIterator.Options();
+		sitemapFileIteratorOptions.setCharset(Utils.getAsCharset(interfaceDescriptor.getParams(), "sitemap_FileCharset", StandardCharsets.UTF_8));
+		sitemapFileIteratorOptions.setSchemaType(Utils.getAsEnum(interfaceDescriptor.getParams(), "sitemap_FileSchema", SitemapFileIterator.Options.SitemapSchemaType.Xml, SitemapFileIterator.Options.SitemapSchemaType.class));
+		sitemapFileIteratorOptions.setFileType(Utils.getAsEnum(interfaceDescriptor.getParams(), "sitemap_FileType", SitemapFileIterator.Options.SitemapFileType.Text, SitemapFileIterator.Options.SitemapFileType.class));
+		return sitemapFileIteratorOptions;
+	}
+
+	private RepositoryQueueIterator.Options compileRepositoryQueueOptions(InterfaceDescriptor interfaceDescriptor) throws MalformedURLException {
+		RepositoryQueueIterator.Options repositoryQueueIteratorOptions = new RepositoryQueueIterator.Options();
+		repositoryQueueIteratorOptions.setBlockPolling(Utils.getAsBoolean(interfaceDescriptor.getParams(), "consumerBlockPolling", true));
+		repositoryQueueIteratorOptions.setPollTimeout(Utils.getAsLong(interfaceDescriptor.getParams(), "consumerBlockPollingTimeout", 2));
+		repositoryQueueIteratorOptions.setPollTimeoutUnit(Utils.getAsEnum(interfaceDescriptor.getParams(), "consumerBlockPollingTimeoutUnit", TimeUnit.MINUTES, TimeUnit.class));
+		return repositoryQueueIteratorOptions;
+	}
+
+	private SitemapIndexRepositoryIterable.Options compileSitemapIndexRepositoryOptions(InterfaceDescriptor interfaceDescriptor) throws MalformedURLException {
+		SitemapIndexRepositoryIterable.Options sitemapIndexRepositoryIterableOptions = new SitemapIndexRepositoryIterable.Options();
+		sitemapIndexRepositoryIterableOptions.setQueueSize(Utils.getAsInt(interfaceDescriptor.getParams(), "sitemap_queueSize", 100));
+		sitemapIndexRepositoryIterableOptions.setPutTimeout(Utils.getAsLong(interfaceDescriptor.getParams(), "sitemap_producerBlockPollingTimeout", 20));
+		sitemapIndexRepositoryIterableOptions.setPutTimeoutUnit(Utils.getAsEnum(interfaceDescriptor.getParams(), "sitemap_producerBlockPollingTimeoutUnit", TimeUnit.MINUTES, TimeUnit.class));
+		sitemapIndexRepositoryIterableOptions.setRepositoryQueueIteratorOptions(this.compileRepositoryQueueOptions(interfaceDescriptor));
+		sitemapIndexRepositoryIterableOptions.setSitemapFileIteratorOptions(this.compileSitemapFileOptions(interfaceDescriptor));
+		sitemapIndexRepositoryIterableOptions.setSitemapIndexIteratorOptions(this.compileSitemapIndexOptions(interfaceDescriptor));
+		return sitemapIndexRepositoryIterableOptions;
+	}
+
+	private EndpointAccessIterator.Options compileEndpointAccessOptions(InterfaceDescriptor interfaceDescriptor) throws MalformedURLException {
+		EndpointAccessIterator.Options endpointAccessIteratorOptions = new EndpointAccessIterator.Options();
+		endpointAccessIteratorOptions.setCharset(Utils.getAsCharset(interfaceDescriptor.getParams(), "endpointCharset", StandardCharsets.UTF_8));
+		return endpointAccessIteratorOptions;
+	}
+
+	private DatasetMappingIterator.Options compileDatasetMappingOptions(InterfaceDescriptor interfaceDescriptor) throws MalformedURLException {
+		DatasetMappingIterator.Options datasetMappingIteratorOptions = new DatasetMappingIterator.Options();
+
+		DatasetMappingIterator.Options.UpdatedDateOptions datasetMappingIteratorUpdatedDateOptions = new DatasetMappingIterator.Options.UpdatedDateOptions();
+		datasetMappingIteratorUpdatedDateOptions.format =Utils.getAsString(interfaceDescriptor.getParams(), "updatedDateFormat", "YYYY-MM-DD");
+		datasetMappingIteratorOptions.setUpdatedDateOptions(datasetMappingIteratorUpdatedDateOptions);
+
+		DatasetMappingIterator.Options.CreatedDateOptions datasetMappingIteratorCreatedDateOptions = new DatasetMappingIterator.Options.CreatedDateOptions();
+		datasetMappingIteratorCreatedDateOptions.format =Utils.getAsString(interfaceDescriptor.getParams(), "createdDateFormat", "YYYY-MM-DD");
+		datasetMappingIteratorOptions.setCreatedDateOptions(datasetMappingIteratorCreatedDateOptions);
+
+		DatasetMappingIterator.Options.PublicationDateOptions datasetMappingIteratorPublicationDateOptions = new DatasetMappingIterator.Options.PublicationDateOptions();
+		datasetMappingIteratorPublicationDateOptions.format =Utils.getAsString(interfaceDescriptor.getParams(), "publicationDateFormat", "YYYY-MM-DD");
+		datasetMappingIteratorOptions.setPublicationDateOptions(datasetMappingIteratorPublicationDateOptions);
+
+		DatasetMappingIterator.Options.ContributorOptions datasetMappingIteratorContributorOptions = new DatasetMappingIterator.Options.ContributorOptions();
+		datasetMappingIteratorContributorOptions.fallbackType =Utils.getAsEnum(interfaceDescriptor.getParams(), "contributorFallbackType",DatasetDocument.Contributor.ContributorType.Other, DatasetDocument.Contributor.ContributorType.class);
+		datasetMappingIteratorOptions.setContributorOptions(datasetMappingIteratorContributorOptions);
+
+		DatasetMappingIterator.Options.IdentifierOptions datasetMappingIteratorIdentifierOptions = new DatasetMappingIterator.Options.IdentifierOptions();
+		datasetMappingIteratorIdentifierOptions.fallbackType = Utils.getAsEnum(interfaceDescriptor.getParams(), "identifierFallbackType", null, DatasetDocument.Identifier.IdentifierType.class);
+		datasetMappingIteratorIdentifierOptions.fallbackURL = Utils.getAsBoolean(interfaceDescriptor.getParams(), "identifierFallbackURL", true);
+		datasetMappingIteratorIdentifierOptions.mappingARK = Utils.getAsStringCsv(interfaceDescriptor.getParams(), "identifierMappingARK", null);
+		datasetMappingIteratorIdentifierOptions.mappingDOI = Utils.getAsStringCsv(interfaceDescriptor.getParams(), "identifierMappingDOI", null);
+		datasetMappingIteratorIdentifierOptions.mappingHandle = Utils.getAsStringCsv(interfaceDescriptor.getParams(), "identifierMappingHandle", null);
+		datasetMappingIteratorIdentifierOptions.mappingPURL = Utils.getAsStringCsv(interfaceDescriptor.getParams(), "identifierMappingPURL", null);
+		datasetMappingIteratorIdentifierOptions.mappingURL = Utils.getAsStringCsv(interfaceDescriptor.getParams(), "identifierMappingURL", null);
+		datasetMappingIteratorIdentifierOptions.mappingURN = Utils.getAsStringCsv(interfaceDescriptor.getParams(), "identifierMappingURN", null);
+		datasetMappingIteratorOptions.setIdentifierOptions(datasetMappingIteratorIdentifierOptions);
+		return datasetMappingIteratorOptions;
+	}
+
+	private SchemaOrgIterable.Options compileSchemaOrgOptions(InterfaceDescriptor interfaceDescriptor) throws MalformedURLException {
+		SchemaOrgIterable.Options schemaOrgIterableOptions = new SchemaOrgIterable.Options();
+		schemaOrgIterableOptions.setDatasetMappingOptions(this.compileDatasetMappingOptions(interfaceDescriptor));
+		schemaOrgIterableOptions.setEndpointAccessOptions(this.compileEndpointAccessOptions(interfaceDescriptor));
+		return schemaOrgIterableOptions;
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/Utils.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/Utils.java
@ -0,0 +1,208 @@
+package eu.dnetlib.data.collector.plugins.schemaorg;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dom4j.DocumentException;
+import org.dom4j.io.SAXReader;
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathFactory;
+import java.io.*;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+
+public class Utils {
+	private static final Log log = LogFactory.getLog(Utils.class);
+
+	public static List<String> collectAsStrings(String xml, String xpath) throws Exception{
+		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+		DocumentBuilder builder = factory.newDocumentBuilder();
+		Document doc = builder.parse(new InputSource(new StringReader(xml)));
+		return Utils.collectAsStrings(doc, xpath);
+	}
+
+	public static List<String> collectAsStrings(File file, String xpath) throws Exception{
+		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+		DocumentBuilder builder = factory.newDocumentBuilder();
+		Document doc = builder.parse(file);
+		return Utils.collectAsStrings(doc, xpath);
+	}
+
+	public static List<String> collectAsStrings(Document doc, String xpath) throws Exception{
+		XPathFactory xPathfactory = XPathFactory.newInstance();
+		XPath path = xPathfactory.newXPath();
+		XPathExpression expr = path.compile(xpath);
+		NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
+
+		List<String> values = new ArrayList<>();
+
+		for (int i = 0; i < nodes.getLength(); i++)
+			values.add(nodes.item(i).getNodeValue());
+
+		return values;
+	}
+
+	public static void decompressGZipTo(File input, File output) throws Exception {
+		try (GZIPInputStream in = new GZIPInputStream(new FileInputStream(input))){
+			try (FileOutputStream out = new FileOutputStream(output)){
+				byte[] buffer = new byte[1024];
+				int len;
+				while((len = in.read(buffer)) != -1){
+					out.write(buffer, 0, len);
+				}
+			}
+		}
+	}
+
+	public static String getAsString(HashMap<String,String> map, String key, String defaultValue)
+	{
+		String value = map.get(key);
+		if(value == null) return defaultValue;
+		return value;
+	}
+
+	public static List<String> getAsStringCsv(HashMap<String,String> map, String key, List<String> defaultValue)
+	{
+		String value = map.get(key);
+		if(value == null) return defaultValue;
+		String[] splits = value.split(",");
+		List<String> curated = new ArrayList<>();
+		for(String item : splits){
+			if(item == null || item.trim().length() == 0) continue;
+			curated.add(item.trim());
+		}
+		return curated;
+	}
+
+	public static int getAsInt(HashMap<String,String> map, String key, int defaultValue)
+	{
+		String value = map.get(key);
+		if(value == null) return defaultValue;
+		try {
+			return Integer.parseInt(value);
+		} catch (NumberFormatException e) {
+			return defaultValue;
+		}
+	}
+
+	public static long getAsLong(HashMap<String,String> map, String key, long defaultValue)
+	{
+		String value = map.get(key);
+		if(value == null) return defaultValue;
+		try {
+			return Long.parseLong(value);
+		} catch (NumberFormatException e) {
+			return defaultValue;
+		}
+	}
+
+	public static <E extends Enum<E>> E getAsEnum(HashMap<String,String> map, String key, E defaultValue, Class<E> clazz) {
+		//EnumSet<E> values = EnumSet.allOf(defaultValue.getClass());
+		EnumSet<E> values = EnumSet.allOf(clazz);
+		String value = map.get(key);
+		if (value == null) return defaultValue;
+		for(E val : values){
+			if(!val.name().equalsIgnoreCase(value)) continue;
+			return val;
+		}
+		return defaultValue;
+	}
+
+	public static Boolean getAsBoolean(HashMap<String,String> map, String key, Boolean defaultValue) {
+		String value = map.get(key);
+		if (value == null) return defaultValue;
+		return Boolean.parseBoolean(value);
+	}
+
+	public static Charset getAsCharset(HashMap<String,String> map, String key, Charset defaultValue)
+	{
+		String value = map.get(key);
+		if(value == null) return defaultValue;
+		try {
+			return Charset.forName(value);
+		} catch (UnsupportedCharsetException e) {
+			return defaultValue;
+		}
+	}
+
+
+	public static String RemoteAccessWithRetry(int retryCount, long waitBetweenRetriesMillis, URL endpoint, Charset charset) throws IOException {
+		int retry =0;
+		while(retry < retryCount) {
+			try {
+				return IOUtils.toString(endpoint, charset);
+			} catch (Exception ex) {
+				retry += 1;
+				if (retry < retryCount) {
+					log.debug("problem accessing url " + endpoint + ". will retry after " + waitBetweenRetriesMillis + " milliseconds");
+					try {
+						Thread.sleep(waitBetweenRetriesMillis);
+					} catch (Exception e) {
+					}
+				}
+				else{
+					log.debug("problem accessing url " + endpoint + ". throwing");
+					throw  ex;
+				}
+			}
+		}
+		return null;
+	}
+
+	public static Boolean validateXml(String xml){
+		try {
+			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+			DocumentBuilder builder = factory.newDocumentBuilder();
+			InputSource is = new InputSource(new StringReader(xml));
+			builder.parse(is);
+			return true;
+		}catch(Exception ex){
+			return false;
+		}
+	}
+
+	public static void writeFiles(final Iterable<String> iterable, final String outDir) throws DocumentException, IOException {
+
+		int skipped = 0;
+		int count = 0;
+
+		for(String item : iterable) {
+
+			final org.dom4j.Document doc = new SAXReader().read(new StringReader(item));
+
+			if (StringUtils.isNotBlank(doc.valueOf("/*[local-name() = 'dataset']/*[local-name() = 'identifier']/text()"))) {
+				log.info(item);
+				String fileName = outDir + "/" + count++;
+
+				try(BufferedWriter w = new BufferedWriter(new FileWriter(fileName))) {
+					w.write(item);
+				}
+				log.info("wrote " + fileName);
+			} else {
+				skipped++;
+			}
+			if (skipped % 100 == 0) {
+				log.info("skipped so far " + skipped);
+			}
+			if (count % 100 == 0) {
+				log.info("stored so far " + count);
+			}
+		}
+		log.info(String.format("Done! skipped %s, stored %s", skipped, count));
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/httpapi/HttpApiRepositoryIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/httpapi/HttpApiRepositoryIterable.java
@ -0,0 +1,6 @@
+package eu.dnetlib.data.collector.plugins.schemaorg.httpapi;
+
+import eu.dnetlib.data.collector.plugins.schemaorg.RepositoryIterable;
+
+public interface HttpApiRepositoryIterable extends RepositoryIterable {
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/httpapi/kaggle/KaggleRepositoryIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/httpapi/kaggle/KaggleRepositoryIterable.java
@ -0,0 +1,208 @@
+package eu.dnetlib.data.collector.plugins.schemaorg.httpapi.kaggle;
+
+import eu.dnetlib.data.collector.plugins.schemaorg.RepositoryIterable;
+import eu.dnetlib.data.collector.plugins.schemaorg.RepositoryQueueIterator;
+import eu.dnetlib.data.collector.plugins.schemaorg.httpapi.HttpApiRepositoryIterable;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.json.JSONArray;
+import org.json.JSONObject;
+
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+public class KaggleRepositoryIterable implements HttpApiRepositoryIterable {
+	private static final Log log = LogFactory.getLog(KaggleRepositoryIterable.class);
+
+	public static class Options {
+		private String queryUrl;
+		private String queryPagePlaceholder;
+		private Charset charset;
+		private String responsePropertyTotalDataset;
+		private String responsePropertyDatasetList;
+		private String responsePropertyDatasetUrl;
+		private String responseBaseDatasetUrl;
+		private long putTimeout;
+		private TimeUnit putTimeoutUnit;
+
+		private RepositoryQueueIterator.Options repositoryQueueIteratorOptions;
+
+		private int queueSize;
+
+		public long getPutTimeout() {
+			return putTimeout;
+		}
+
+		public void setPutTimeout(long putTimeout) {
+			this.putTimeout = putTimeout;
+		}
+
+		public TimeUnit getPutTimeoutUnit() {
+			return putTimeoutUnit;
+		}
+
+		public void setPutTimeoutUnit(TimeUnit putTimeoutUnit) {
+			this.putTimeoutUnit = putTimeoutUnit;
+		}
+
+		public int getQueueSize() {
+			return queueSize;
+		}
+
+		public void setQueueSize(int queueSize) {
+			this.queueSize = queueSize;
+		}
+
+		public String getResponseBaseDatasetUrl() {
+			return responseBaseDatasetUrl;
+		}
+
+		public void setResponseBaseDatasetUrl(String responseBaseDatasetUrl) {
+			this.responseBaseDatasetUrl = responseBaseDatasetUrl;
+		}
+
+		public RepositoryQueueIterator.Options getRepositoryQueueIteratorOptions() {
+			return repositoryQueueIteratorOptions;
+		}
+
+		public void setRepositoryQueueIteratorOptions(RepositoryQueueIterator.Options repositoryQueueIteratorOptions) {
+			this.repositoryQueueIteratorOptions = repositoryQueueIteratorOptions;
+		}
+
+		public String getResponsePropertyDatasetUrl() {
+			return responsePropertyDatasetUrl;
+		}
+
+		public void setResponsePropertyDatasetUrl(String responsePropertyDatasetUrl) {
+			this.responsePropertyDatasetUrl = responsePropertyDatasetUrl;
+		}
+
+		public String getResponsePropertyDatasetList() {
+			return responsePropertyDatasetList;
+		}
+
+		public void setResponsePropertyDatasetList(String responsePropertyDatasetList) {
+			this.responsePropertyDatasetList = responsePropertyDatasetList;
+		}
+
+		public String getResponsePropertyTotalDataset() {
+			return responsePropertyTotalDataset;
+		}
+
+		public void setResponsePropertyTotalDataset(String responsePropertyTotalDataset) {
+			this.responsePropertyTotalDataset = responsePropertyTotalDataset;
+		}
+
+		public Charset getCharset() {
+			return charset;
+		}
+
+		public void setCharset(Charset charset) {
+			this.charset = charset;
+		}
+
+		public String getQueryPagePlaceholder() {
+			return queryPagePlaceholder;
+		}
+
+		public void setQueryPagePlaceholder(String queryPagePlaceholder) {
+			this.queryPagePlaceholder = queryPagePlaceholder;
+		}
+
+		public String getQueryUrl() {
+			return queryUrl;
+		}
+
+		public void setQueryUrl(String queryUrl) {
+			this.queryUrl = queryUrl;
+		}
+	}
+
+	private Options options;
+	private ArrayBlockingQueue<String> queue;
+
+	public KaggleRepositoryIterable(Options options) {
+		this.options = options;
+//		this.currentPage = 1;
+//		this.terminated = false;
+	}
+
+	public void bootstrap() {
+		this.queue = new ArrayBlockingQueue<>(this.options.getQueueSize());
+
+		Thread ft = new Thread(new Harvester() );
+		ft.start();
+//		ExecutorService executor = Executors.newSingleThreadExecutor();
+//		executor.execute(new Harvester());
+//		executor.shutdown();
+	}
+
+	@Override
+	public Iterator<String> iterator() {
+		return new RepositoryQueueIterator(this.options.getRepositoryQueueIteratorOptions(), this.queue);
+	}
+
+	private class Harvester implements Runnable{
+
+		@Override
+		public void run() {
+			this.execute();
+		}
+		private void execute() {
+			try {
+				int currentPage = 1;
+				int totalDatasets = 0;
+				int readDatasets = 0;
+				while (true) {
+					String query = options.getQueryUrl().replace(options.getQueryPagePlaceholder(), Integer.toString(currentPage));
+					String response = IOUtils.toString(new URL(query), options.getCharset());
+					currentPage += 1;
+
+					JSONObject pageObject = new JSONObject(response);
+					totalDatasets = pageObject.optInt(options.getResponsePropertyTotalDataset());
+					JSONArray datasets = pageObject.optJSONArray(options.getResponsePropertyDatasetList());
+
+					if (datasets == null || datasets.length() == 0) break;
+
+					readDatasets += datasets.length();
+
+					for (int i = 0; i < datasets.length(); i += 1) {
+						JSONObject item = datasets.optJSONObject(i);
+						String urlFragment = item.optString(options.getResponsePropertyDatasetUrl());
+						if (urlFragment == null || urlFragment.trim().length() == 0) continue;
+						String endpoint = String.format("%s%s", options.getResponseBaseDatasetUrl(), urlFragment);
+
+						log.debug("adding endpoint in queue");
+						log.debug("queue size: " + queue.size());
+
+						try {
+							queue.offer(endpoint, options.getPutTimeout(), options.getPutTimeoutUnit());
+						} catch (InterruptedException ex) {
+							log.warn(String.format("could not put elements from queue for more than %s %s. breaking", options.getPutTimeout(), options.getPutTimeoutUnit()));
+							break;
+						}
+						log.debug("endpoint added in queue");
+						log.debug("queue size: " + queue.size());
+					}
+
+					if (readDatasets >= totalDatasets) break;
+				}
+			} catch (Exception ex) {
+				log.error("problem execution harvesting", ex);
+			} finally {
+				try {
+					queue.offer(RepositoryIterable.TerminationHint, options.getPutTimeout(), options.getPutTimeoutUnit());
+				} catch (Exception ex) {
+					log.fatal("could not add termination hint. the process will not terminate gracefully", ex);
+				}
+			}
+		}
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/sitemapindex/SitemapFileIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/sitemapindex/SitemapFileIterator.java
@ -0,0 +1,172 @@
+package eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex;
+
+import eu.dnetlib.data.collector.plugins.schemaorg.Utils;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.io.*;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.*;
+
+public class SitemapFileIterator implements Iterator<String> {
+	private static final Log log = LogFactory.getLog(SitemapFileIterator.class);
+
+	public static class Options {
+
+		public enum SitemapFileType{
+			Text,
+			GZ
+		}
+
+		public enum SitemapSchemaType{
+			Text,
+			Xml
+		}
+
+		public Options(){}
+
+		public Options(URL fileUrl, Charset charset, SitemapSchemaType schemaType, SitemapFileType fileType) {
+			this.fileUrl = fileUrl;
+			this.charset = charset;
+			this.schemaType = schemaType;
+			this.fileType = fileType;
+		}
+
+		private SitemapFileType fileType;
+		private SitemapSchemaType schemaType;
+		private URL fileUrl;
+		private Charset charset;
+
+		public Charset getCharset() {
+			return charset;
+		}
+
+		public void setCharset(Charset charset) {
+			this.charset = charset;
+		}
+
+		public URL getFileUrl() {
+			return fileUrl;
+		}
+
+		public void setFileUrl(URL fileUrl) {
+			this.fileUrl = fileUrl;
+		}
+
+		public SitemapFileType getFileType() {
+			return fileType;
+		}
+
+		public void setFileType(SitemapFileType fileType) {
+			this.fileType = fileType;
+		}
+
+		public SitemapSchemaType getSchemaType() {
+			return schemaType;
+		}
+
+		public void setSchemaType(SitemapSchemaType schemaType) {
+			this.schemaType = schemaType;
+		}
+
+		@Override
+		public Object clone(){
+			Options clone = new Options();
+			clone.setCharset(this.getCharset());
+			clone.setFileType(this.getFileType());
+			clone.setFileUrl(this.getFileUrl());
+			clone.setSchemaType(this.getSchemaType());
+			return clone;
+		}
+	}
+
+	private Options options;
+	private File downloadedFile;
+	private File contentFile;
+	private Queue<String> locations;
+
+	public SitemapFileIterator(Options options){
+		this.options = options;
+	}
+
+	public void bootstrap() {
+		LinkedList<String> endpoints = null;
+		try {
+			log.debug(String.format("bootstrapping sitemapindex file access for sitemapindex %s", this.options.getFileUrl()));
+			this.downloadedFile = File.createTempFile(UUID.randomUUID().toString(), ".tmp");
+			this.downloadedFile.deleteOnExit();
+			FileUtils.copyURLToFile(this.options.getFileUrl(), this.downloadedFile);
+			log.debug(String.format("downloaded file: %s has size %d", this.downloadedFile.toString(), this.downloadedFile.length()));
+
+			switch (this.options.getFileType()) {
+				case Text: {
+					this.contentFile = this.downloadedFile;
+					break;
+				}
+				case GZ: {
+					this.contentFile = File.createTempFile(UUID.randomUUID().toString(), ".tmp");
+					this.contentFile.deleteOnExit();
+					Utils.decompressGZipTo(this.downloadedFile, this.contentFile);
+					log.debug(String.format("extracted gz file: %s has size %d", this.contentFile.toString(), this.contentFile.length()));
+					break;
+				}
+				default:
+					throw new CollectorServiceException("unrecognized file type " + this.options.getFileType());
+			}
+
+			List<String> content = this.collectContentLocations();
+
+			log.debug(String.format("extracted %d sitemapindex endpoints", content.size()));
+			endpoints = new LinkedList<>(content);
+		}catch(Exception ex){
+			log.error(String.format("error processing sitemapindex %s. returning 0 endpoints",this.options.getFileUrl()), ex);
+			endpoints = new LinkedList<>();
+		}finally {
+			if (this.contentFile != null) {
+				this.contentFile.delete();
+			}
+			if (this.downloadedFile != null) {
+				this.downloadedFile.delete();
+			}
+		}
+		this.locations = endpoints;
+	}
+
+	private List<String> collectContentLocations() throws Exception{
+		switch(this.options.getSchemaType()) {
+			case Text:{
+				return this.collectTextContentLocations();
+			}
+			case Xml:{
+				return this.collectXmlContentLocations();
+			}
+			default: throw new CollectorServiceException("unrecognized file type "+this.options.getFileType());
+		}
+	}
+
+	private List<String> collectTextContentLocations() throws Exception {
+		log.debug(String.format("reading endpoint locations from text sitemapindex"));
+		try (FileInputStream in = new FileInputStream(this.contentFile)) {
+			return IOUtils.readLines(in, this.options.getCharset());
+		}
+	}
+
+	private List<String> collectXmlContentLocations() throws Exception {
+		log.debug(String.format("reading endpoint locations from xml sitemapindex"));
+		return Utils.collectAsStrings(this.contentFile,"/urlset/url/loc/text()");
+	}
+
+	@Override
+	public boolean hasNext() {
+		return !this.locations.isEmpty();
+	}
+
+	@Override
+	public String next() {
+		return this.locations.poll();
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/sitemapindex/SitemapIndexIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/sitemapindex/SitemapIndexIterator.java
@ -0,0 +1,74 @@
+package eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex;
+
+import eu.dnetlib.data.collector.plugins.schemaorg.Utils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.*;
+
+public class SitemapIndexIterator implements Iterator<String> {
+	private static final Log log = LogFactory.getLog(SitemapIndexIterator.class);
+
+	public static class Options {
+		private URL indexUrl;
+		private Charset charset;
+
+		public Options(){}
+
+		public Options(URL indexUrl, Charset charset){
+			this.indexUrl = indexUrl;
+			this.charset = charset;
+		}
+
+		public URL getIndexUrl() {
+			return indexUrl;
+		}
+
+		public void setIndexUrl(URL indexUrl) {
+			this.indexUrl = indexUrl;
+		}
+
+		public Charset getCharset() {
+			return charset;
+		}
+
+		public void setCharset(Charset charset) {
+			this.charset = charset;
+		}
+	}
+
+	private Options options;
+	private Queue<String> sitemapFiles;
+
+	public SitemapIndexIterator(Options options) {
+		this.options = options;
+	}
+
+	public void bootstrap() {
+		List<String> files = null;
+		try {
+			log.debug("bootstrapping sitemapindex index access");
+			String sitemapIndexPayload = Utils.RemoteAccessWithRetry(3, 5000, this.options.getIndexUrl(), this.options.getCharset());
+			log.debug(String.format("sitemapindex payload is: %s", sitemapIndexPayload));
+			files = Utils.collectAsStrings(sitemapIndexPayload, "/sitemapindex/sitemap/loc/text()");
+			log.debug(String.format("extracted %d sitemapindex files", files.size()));
+		}catch(Exception ex){
+			log.error("problem bootstrapping sitemapindex index access. returning 0 files", ex);
+			files = new ArrayList<>();
+		}
+		this.sitemapFiles = new PriorityQueue<String>(files);
+	}
+
+	@Override
+	public boolean hasNext() {
+		return !this.sitemapFiles.isEmpty();
+	}
+
+	@Override
+	public String next() {
+		return this.sitemapFiles.poll();
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/sitemapindex/SitemapIndexRepositoryIterable.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/sitemapindex/SitemapIndexRepositoryIterable.java
@ -0,0 +1,147 @@
+package eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex;
+
+import eu.dnetlib.data.collector.plugins.schemaorg.RepositoryIterable;
+import eu.dnetlib.data.collector.plugins.schemaorg.RepositoryQueueIterator;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.net.URL;
+import java.util.Iterator;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+public class SitemapIndexRepositoryIterable implements RepositoryIterable {
+	private static final Log log = LogFactory.getLog(SitemapIndexRepositoryIterable.class);
+
+	public static class Options {
+		private SitemapIndexIterator.Options sitemapIndexIteratorOptions;
+		private SitemapFileIterator.Options sitemapFileIteratorOptions;
+		private RepositoryQueueIterator.Options repositoryQueueIteratorOptions;
+		private long putTimeout;
+		private TimeUnit putTimeoutUnit;
+
+		private int queueSize;
+
+		public long getPutTimeout() {
+			return putTimeout;
+		}
+
+		public void setPutTimeout(long putTimeout) {
+			this.putTimeout = putTimeout;
+		}
+
+		public TimeUnit getPutTimeoutUnit() {
+			return putTimeoutUnit;
+		}
+
+		public void setPutTimeoutUnit(TimeUnit putTimeoutUnit) {
+			this.putTimeoutUnit = putTimeoutUnit;
+		}
+
+		public int getQueueSize() {
+			return queueSize;
+		}
+
+		public void setQueueSize(int queueSize) {
+			this.queueSize = queueSize;
+		}
+
+		public RepositoryQueueIterator.Options getRepositoryQueueIteratorOptions() {
+			return repositoryQueueIteratorOptions;
+		}
+
+		public void setRepositoryQueueIteratorOptions(RepositoryQueueIterator.Options repositoryQueueIteratorOptions) {
+			this.repositoryQueueIteratorOptions = repositoryQueueIteratorOptions;
+		}
+
+		public SitemapIndexIterator.Options getSitemapIndexIteratorOptions() {
+			return sitemapIndexIteratorOptions;
+		}
+
+		public void setSitemapIndexIteratorOptions(SitemapIndexIterator.Options sitemapIndexIteratorOptions) {
+			this.sitemapIndexIteratorOptions = sitemapIndexIteratorOptions;
+		}
+
+		public SitemapFileIterator.Options getSitemapFileIteratorOptions() {
+			return sitemapFileIteratorOptions;
+		}
+
+		public void setSitemapFileIteratorOptions(SitemapFileIterator.Options sitemapFileIteratorOptions) {
+			this.sitemapFileIteratorOptions = sitemapFileIteratorOptions;
+		}
+	}
+
+	private Options options;
+	private ArrayBlockingQueue<String> queue;
+
+	public SitemapIndexRepositoryIterable(Options options) {
+		this.options = options;
+	}
+
+	public void bootstrap() {
+		this.queue = new ArrayBlockingQueue<>(this.options.getQueueSize());
+
+		Thread ft = new Thread(new Harvester() );
+		ft.start();
+//		ExecutorService executor = Executors.newSingleThreadExecutor();
+//		executor.execute(new Harvester());
+//		executor.shutdown();
+	}
+
+	@Override
+	public Iterator<String> iterator() {
+		return new RepositoryQueueIterator(this.options.getRepositoryQueueIteratorOptions(), this.queue);
+	}
+
+	private class Harvester implements Runnable{
+
+		@Override
+		public void run() {
+			this.execute();
+		}
+
+		private void execute(){
+			try {
+				SitemapIndexIterator sitemapIndexIterator = new SitemapIndexIterator(options.getSitemapIndexIteratorOptions());
+				sitemapIndexIterator.bootstrap();
+
+				while (sitemapIndexIterator.hasNext()) {
+					String sitemapFile = sitemapIndexIterator.next();
+					if(sitemapFile == null) continue;
+
+					SitemapFileIterator.Options sitemapFileIteratorOptions = (SitemapFileIterator.Options)options.getSitemapFileIteratorOptions().clone();
+					sitemapFileIteratorOptions.setFileUrl(new URL(sitemapFile));
+					SitemapFileIterator sitemapFileIterator = new SitemapFileIterator(sitemapFileIteratorOptions);
+					sitemapFileIterator.bootstrap();
+
+					while(sitemapFileIterator.hasNext()){
+						String endpoint = sitemapFileIterator.next();
+						if(endpoint == null) continue;;
+
+						log.debug("adding endpoint in queue");
+						log.debug("queue size: " + queue.size());
+						try {
+							queue.offer(endpoint, options.getPutTimeout(), options.getPutTimeoutUnit());
+						} catch (InterruptedException ex) {
+							log.warn(String.format("could not put elements from queue for more than %s %s. breaking", options.getPutTimeout(), options.getPutTimeoutUnit()));
+							break;
+						}
+						log.debug("endpoint added in queue");
+						log.debug("queue size: " + queue.size());
+					}
+				}
+			}catch(Exception ex){
+				log.error("problem execution harvesting", ex);
+			}
+			finally {
+				try {
+					queue.offer(RepositoryIterable.TerminationHint, options.getPutTimeout(), options.getPutTimeoutUnit());
+				} catch (Exception ex) {
+					log.fatal("could not add termination hint. the process will not terminate gracefully", ex);
+				}
+			}
+		}
+	}
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/sftp/SftpCollectorPlugin.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/sftp/SftpCollectorPlugin.java
@ -0,0 +1,71 @@
+package eu.dnetlib.data.collector.plugins.sftp;
+
+import java.util.Iterator;
+import java.util.Set;
+
+import com.google.common.base.Splitter;
+import com.google.common.collect.Sets;
+import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
+import eu.dnetlib.data.collector.rmi.CollectorServiceException;
+import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
+
+/**
+ * Created by andrea on 11/01/16.
+ */
+public class SftpCollectorPlugin extends AbstractCollectorPlugin {
+
+    private SftpIteratorFactory sftpIteratorFactory;
+
+    @Override
+    public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String toDate)
+            throws CollectorServiceException {
+        final String baseUrl = interfaceDescriptor.getBaseUrl();
+        final String username = interfaceDescriptor.getParams().get("username");
+        final String password = interfaceDescriptor.getParams().get("password");
+        final String recursive = interfaceDescriptor.getParams().get("recursive");
+        final String extensions = interfaceDescriptor.getParams().get("extensions");
+
+        if ((baseUrl == null) || baseUrl.isEmpty()) {
+            throw new CollectorServiceException("Param 'baseurl' is null or empty");
+        }
+        if ((username == null) || username.isEmpty()) {
+            throw new CollectorServiceException("Param 'username' is null or empty");
+        }
+        if ((password == null) || password.isEmpty()) {
+            throw new CollectorServiceException("Param 'password' is null or empty");
+        }
+        if ((recursive == null) || recursive.isEmpty()) {
+            throw new CollectorServiceException("Param 'recursive' is null or empty");
+        }
+        if ((extensions == null) || extensions.isEmpty()) {
+            throw new CollectorServiceException("Param 'extensions' is null or empty");
+        }
+        if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); }
+
+        // final int fromDateIntSeconds =
+
+        return new Iterable<String>() {
+
+            boolean isRecursive = "true".equals(recursive);
+
+            Set<String> extensionsSet = parseSet(extensions);
+
+            @Override
+            public Iterator<String> iterator() {
+                return getSftpIteratorFactory().newIterator(baseUrl, username, password, isRecursive, extensionsSet, fromDate);
+            }
+
+            private Set<String> parseSet(final String extensions) {
+                return Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(extensions));
+            }
+        };
+    }
+
+    public SftpIteratorFactory getSftpIteratorFactory() {
+        return sftpIteratorFactory;
+    }
+
+    public void setSftpIteratorFactory(SftpIteratorFactory sftpIteratorFactory) {
+        this.sftpIteratorFactory = sftpIteratorFactory;
+    }
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/sftp/SftpIterator.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/sftp/SftpIterator.java
@ -0,0 +1,206 @@
+package eu.dnetlib.data.collector.plugins.sftp;
+
+import java.io.OutputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.*;
+
+import com.jcraft.jsch.*;
+import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
+import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+/**
+ * Created by andrea on 11/01/16.
+ */
+public class SftpIterator implements Iterator<String> {
+    private static final Log log = LogFactory.getLog(SftpIterator.class);
+
+    private static final int MAX_RETRIES = 5;
+    private static final int DEFAULT_TIMEOUT = 30000;
+    private static final long BACKOFF_MILLIS = 10000;
+
+    private String baseUrl;
+    private String sftpURIScheme;
+    private String sftpServerAddress;
+    private String remoteSftpBasePath;
+    private String username;
+    private String password;
+    private boolean isRecursive;
+    private Set<String> extensionsSet;
+	private boolean incremental;
+
+    private Session sftpSession;
+    private ChannelSftp sftpChannel;
+
+    private Queue<String> queue;
+
+	private DateTime fromDate = null;
+	private DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd");
+
+	public SftpIterator(String baseUrl, String username, String password, boolean isRecursive, Set<String> extensionsSet, String fromDate) {
+		this.baseUrl = baseUrl;
+        this.username = username;
+        this.password = password;
+        this.isRecursive = isRecursive;
+        this.extensionsSet = extensionsSet;
+		this.incremental = StringUtils.isNotBlank(fromDate);
+		if (incremental) {
+			//I expect fromDate in the format 'yyyy-MM-dd'. See class eu.dnetlib.msro.workflows.nodes.collect.FindDateRangeForIncrementalHarvestingJobNode .
+			this.fromDate = DateTime.parse(fromDate, simpleDateTimeFormatter);
+			log.debug("fromDate string: " + fromDate + " -- parsed: " + this.fromDate.toString());
+		}
+		try {
+            URI sftpServer = new URI(baseUrl);
+            this.sftpURIScheme = sftpServer.getScheme();
+            this.sftpServerAddress = sftpServer.getHost();
+            this.remoteSftpBasePath = sftpServer.getPath();
+        } catch (URISyntaxException e) {
+            throw new CollectorServiceRuntimeException("Bad syntax in the URL " + baseUrl);
+        }
+
+        connectToSftpServer();
+        initializeQueue();
+    }
+
+    private void connectToSftpServer() {
+        JSch jsch = new JSch();
+
+        try {
+            JSch.setConfig("StrictHostKeyChecking", "no");
+            sftpSession = jsch.getSession(username, sftpServerAddress);
+            sftpSession.setPassword(password);
+            sftpSession.connect();
+
+            Channel channel = sftpSession.openChannel(sftpURIScheme);
+            channel.connect();
+            sftpChannel = (ChannelSftp) channel;
+	        String pwd = sftpChannel.pwd();
+	        log.debug("PWD from server: " + pwd);
+	        String fullPath = pwd + remoteSftpBasePath;
+	        sftpChannel.cd(fullPath);
+	        log.debug("PWD from server 2 after 'cd " + fullPath + "' : " + sftpChannel.pwd());
+	        log.info("Connected to SFTP server " + sftpServerAddress);
+        } catch (JSchException e) {
+            throw new CollectorServiceRuntimeException("Unable to connect to remote SFTP server.", e);
+        } catch (SftpException e) {
+            throw new CollectorServiceRuntimeException("Unable to access the base remote path on the SFTP server.", e);
+        }
+    }
+
+    private void disconnectFromSftpServer() {
+        sftpChannel.exit();
+        sftpSession.disconnect();
+    }
+
+    private void initializeQueue() {
+        queue = new LinkedList<String>();
+	    log.info(String.format("SFTP collector plugin collecting from %s with recursion = %s, incremental = %s with fromDate=%s", remoteSftpBasePath,
+			    isRecursive,
+			    incremental, fromDate));
+	    listDirectoryRecursive(".", "");
+    }
+
+    private void listDirectoryRecursive(final String parentDir, final String currentDir) {
+        String dirToList = parentDir;
+	    if (StringUtils.isNotBlank(currentDir)) {
+		    dirToList += "/" + currentDir;
+        }
+	    log.debug("PARENT DIR: " + parentDir);
+	    log.debug("DIR TO LIST: " + dirToList);
+	    try {
+            Vector<ChannelSftp.LsEntry> ls = sftpChannel.ls(dirToList);
+            for (ChannelSftp.LsEntry entry : ls) {
+                String currentFileName = entry.getFilename();
+                if (currentFileName.equals(".") || currentFileName.equals("..")) {
+                    // skip parent directory and directory itself
+                    continue;
+                }
+
+                SftpATTRS attrs = entry.getAttrs();
+                if (attrs.isDir()) {
+                    if (isRecursive) {
+                        listDirectoryRecursive(dirToList, currentFileName);
+                    }
+                } else {
+                    // test the file for extensions compliance and, just in case, add it to the list.
+                    for (String ext : extensionsSet) {
+                        if (currentFileName.endsWith(ext)) {
+	                        //test if the file has been changed after the last collection date:
+	                        if (incremental) {
+		                        int mTime = attrs.getMTime();
+		                        //int times are values reduced by the milliseconds, hence we multiply per 1000L
+		                        DateTime dt = new DateTime(mTime * 1000L);
+		                        if (dt.isAfter(fromDate)) {
+			                        queue.add(currentFileName);
+			                        log.debug(currentFileName + " has changed and must be re-collected");
+		                        } else {
+			                        if (log.isDebugEnabled()) {
+				                        log.debug(currentFileName + " has not changed since last collection");
+			                        }
+		                        }
+	                        } else {
+		                        //if it is not incremental, just add it to the queue
+		                        queue.add(currentFileName);
+	                        }
+
+                        }
+                    }
+                }
+            }
+        } catch (SftpException e) {
+            throw new CollectorServiceRuntimeException("Cannot list the sftp remote directory", e);
+
+        }
+    }
+
+    @Override
+    public boolean hasNext() {
+        if (queue.isEmpty()) {
+            disconnectFromSftpServer();
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+    @Override
+    public String next() {
+        String nextRemotePath = queue.remove();
+        int nRepeat = 0;
+	    String fullPathFile = nextRemotePath;
+	    while (nRepeat < MAX_RETRIES) {
+            try {
+                OutputStream baos = new ByteArrayOutputStream();
+                sftpChannel.get(nextRemotePath, baos);
+	            if (log.isDebugEnabled()) {
+		            fullPathFile = sftpChannel.pwd() + "/" + nextRemotePath;
+		            log.debug(String.format("Collected file from SFTP: %s%s", sftpServerAddress, fullPathFile));
+	            }
+	            return baos.toString();
+            } catch (SftpException e) {
+                nRepeat++;
+	            log.warn(String.format("An error occurred [%s] for %s%s, retrying.. [retried %s time(s)]", e.getMessage(), sftpServerAddress, fullPathFile,
+			            nRepeat));
+	            // disconnectFromSftpServer();
+                try {
+                    Thread.sleep(BACKOFF_MILLIS);
+                } catch (InterruptedException e1) {
+                    log.error(e1);
+                }
+            }
+        }
+	    throw new CollectorServiceRuntimeException(
+			    String.format("Impossible to retrieve FTP file %s after %s retries. Aborting FTP collection.", fullPathFile, nRepeat));
+    }
+
+    @Override
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+}
--- a/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/sftp/SftpIteratorFactory.java
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/sftp/SftpIteratorFactory.java
@ -0,0 +1,18 @@
+package eu.dnetlib.data.collector.plugins.sftp;
+
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ * Created by andrea on 11/01/16.
+ */
+public class SftpIteratorFactory {
+
+    public Iterator<String> newIterator(final String baseUrl,
+                                        final String username,
+                                        final String password,
+                                        final boolean isRecursive,
+            final Set<String> extensionsSet, final String fromDate) {
+        return new SftpIterator(baseUrl, username, password, isRecursive, extensionsSet, fromDate);
+    }
+}
--- a/Show More
+++ b/Show More