diff --git a/dhp-workflows/dhp-bmuse/pom.xml b/dhp-workflows/dhp-bmuse/pom.xml
new file mode 100644
index 000000000..b34a0934b
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/pom.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>eu.dnetlib.dhp</groupId>
+        <artifactId>dhp-workflows</artifactId>
+        <version>1.2.4-SNAPSHOT</version>
+    </parent>
+    <artifactId>dhp-bmuse</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_2.11</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_2.11</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>eu.dnetlib.dhp</groupId>
+            <artifactId>dhp-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>hwu.elixir</groupId>
+            <artifactId>bmuse-core</artifactId>
+            <version>0.5.3</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <version>22.0</version>
+        </dependency>
+        <dependency>
+            <groupId>com.squareup.okhttp3</groupId>
+            <artifactId>okhttp</artifactId>
+            <version>3.11.0</version>
+        </dependency>
+    </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/bioschema/ScrapingJob.java b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/bioschema/ScrapingJob.java
new file mode 100644
index 000000000..28ad1ee0a
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/bioschema/ScrapingJob.java
@@ -0,0 +1,112 @@
+
+package eu.dnetlib.dhp.bmuse.bioschema;
+
+import java.nio.charset.Charset;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Stream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.spark.util.LongAccumulator;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.bmuse.utils.BMUSEScraper;
+import eu.dnetlib.dhp.bmuse.utils.UrlParser;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+public class ScrapingJob {
+
+	static Logger logger = LoggerFactory.getLogger(ScrapingJob.class);
+	private static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z");
+
+	public static void main(String[] args) throws Exception {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					ScrapingJob.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json")));
+		parser.parseArgument(args);
+
+		final String nameNode = parser.get("nameNode");
+		final String workingPath = parser.get("workingPath");
+		final String rdfOutput = parser.get("rdfOutput");
+		final String sitemapUrl = parser.get("sitemapUrl");
+		final String sitemapURLKey = parser.get("sitemapURLKey");
+		final String dynamic = parser.get("dynamic");
+		final String maxScrapedPages = parser.get("maxScrapedPages");
+		Boolean dynamicValue = true;
+		if (Objects.nonNull(dynamic)) {
+			dynamicValue = Boolean.parseBoolean(dynamic);
+		}
+		final boolean scrapingType = dynamicValue.booleanValue();
+
+		AtomicLong scraped = new AtomicLong(0l);
+		AtomicLong errors = new AtomicLong(0l);
+
+		logger
+			.info(
+				"*************************** STARTING SCRAPE: "
+					+ formatter.format(new Date(System.currentTimeMillis())));
+		logger.info("Default charset: " + Charset.defaultCharset());
+
+		BMUSEScraper scraper = new BMUSEScraper();
+		String url = sitemapUrl.toLowerCase();
+		Elements urls = UrlParser.getSitemapList(url, sitemapURLKey);
+		long total = urls.size();
+
+		Path output = new Path(
+			nameNode
+				.concat(workingPath)
+				.concat(rdfOutput));
+		Configuration conf = DHPUtils.getHadoopConfiguration(nameNode);
+		try (SequenceFile.Writer writer = SequenceFile
+			.createWriter(
+				conf,
+				SequenceFile.Writer.file(output),
+				SequenceFile.Writer.keyClass(Text.class),
+				SequenceFile.Writer.valueClass(Text.class),
+				SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) {
+			Stream<Element> urlStream = null;
+			if (Objects.nonNull(maxScrapedPages)) {
+				urlStream = urls.stream().limit(Long.parseLong(maxScrapedPages));
+			} else {
+				urlStream = urls.stream();
+			}
+			urlStream.forEach(u -> {
+				try {
+					final Text key = new Text(u.text());
+					final Text value = new Text(scraper.scrapeUrl(u.text(), scrapingType));
+					writer.append(key, value);
+					scraped.getAndIncrement();
+				} catch (Exception e) {
+					logger.error(u.text(), e);
+					errors.getAndIncrement();
+				}
+			});
+		}
+
+		logger
+			.info(
+				"*************************** ENDING SCRAPE: " + formatter.format(new Date(System.currentTimeMillis())));
+		logger
+			.info(
+				"Total pages to scrape: " + total + " Scraped: " + scraped.get() +
+					" Errors: " + errors.get());
+	}
+}
diff --git a/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/BMUSEScraper.java b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/BMUSEScraper.java
new file mode 100644
index 000000000..1e58503fa
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/BMUSEScraper.java
@@ -0,0 +1,85 @@
+
+package eu.dnetlib.dhp.bmuse.utils;
+
+import java.io.StringWriter;
+
+import org.apache.any23.source.DocumentSource;
+import org.apache.any23.source.StringDocumentSource;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Model;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.Rio;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import hwu.elixir.scrape.exceptions.*;
+import hwu.elixir.scrape.scraper.ScraperFilteredCore;
+
+public class BMUSEScraper extends ScraperFilteredCore {
+
+	private static final Logger logger = LoggerFactory.getLogger(BMUSEScraper.class.getName());
+
+	public String scrapeUrl(String url, Boolean dynamic)
+		throws MissingMarkupException, FourZeroFourException {
+		url = fixURL(url);
+
+		String html = "";
+		// The dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information
+		// (dynamic and static respectively)
+
+		if (dynamic) {
+			html = wrapHTMLExtraction(url);
+		} else {
+			html = wrapHTMLExtractionStatic(url);
+		}
+
+		if (html == null || html.contentEquals(""))
+			return new String("empty html");
+		if (logger.isTraceEnabled()) {
+			logger.trace("Read following html ==============================================================");
+			logger.trace(html);
+		}
+
+		try {
+			html = injectId(html, url);
+			if (logger.isTraceEnabled()) {
+				logger
+					.trace(
+						"Same HTML after injecting ID ==============================================================");
+				logger.trace(html);
+			}
+		} catch (MissingHTMLException | JsonLDInspectionException e) {
+			logger.error(e.toString());
+			return e.getMessage();
+		}
+
+		DocumentSource source = new StringDocumentSource(html, url);
+		IRI sourceIRI = SimpleValueFactory.getInstance().createIRI(source.getDocumentIRI());
+
+		String n3 = getTriplesInNTriples(source);
+		if (n3 == null)
+			throw new MissingMarkupException(url);
+
+		Model updatedModel = null;
+		try {
+			updatedModel = processTriples(n3, sourceIRI, 0l);
+		} catch (NTriplesParsingException e1) {
+			logger
+				.error(
+					"Failed to process triples into model; the NTriples generated from the URL (" + url
+						+ ") could not be parsed into a model.");
+			return e1.getMessage();
+		}
+		if (updatedModel == null)
+			return new String("rdf model null");
+
+		try (StringWriter jsonLDWriter = new StringWriter()) {
+			Rio.write(updatedModel, jsonLDWriter, RDFFormat.NQUADS);
+			return jsonLDWriter.toString();
+		} catch (Exception e) {
+			logger.error("Problem writing jsonld for " + url, e);
+			return e.getMessage();
+		}
+	}
+}
diff --git a/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/UrlParser.java b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/UrlParser.java
new file mode 100644
index 000000000..c1c626b69
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/UrlParser.java
@@ -0,0 +1,65 @@
+
+package eu.dnetlib.dhp.bmuse.utils;
+
+import java.io.IOException;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import hwu.elixir.utils.Helpers;
+
+public class UrlParser {
+
+	private static final Logger logger = LoggerFactory.getLogger(UrlParser.class.getName());
+
+	public static Elements getSitemapList(String url, String sitemapURLKey) throws IOException {
+
+		Document doc = new Document(url);
+		Document urlSitemapListsNested;
+		Elements elements = new Elements();
+		Elements sitemaps = new Elements();
+		boolean sitemapindex = false;
+		boolean urlset = false;
+
+		try {
+			int urlLength = url.length();
+			logger.info("parse sitemap list");
+			String sitemapExt = url.substring(urlLength - 3, urlLength);
+			if (sitemapExt.equalsIgnoreCase(".gz")) { // this checks only the extension at the ending
+				logger.info("compressed sitemap");
+				byte[] bytes = Jsoup.connect(url).ignoreContentType(true).execute().bodyAsBytes();
+				doc = Helpers.gzipFileDecompression(bytes);
+			} else {
+				doc = Jsoup.connect(url).maxBodySize(0).get();
+			}
+
+		} catch (IOException e) {
+			logger.error("Jsoup parsing exception: " + e.getMessage());
+		}
+
+		try {
+
+			elements = doc.select(sitemapURLKey);
+
+			// check the html if it is a sitemapindex or a urlset
+			sitemapindex = doc.outerHtml().contains("sitemapindex");
+			urlset = doc.outerHtml().contains("urlset");
+		} catch (NullPointerException e) {
+			logger.error(e.getMessage());
+		}
+
+		if (sitemapindex) {
+			// if sitemapindex get the loc of all the sitemaps
+			// added warning for sitemap index files
+			logger
+				.warn(
+					"please note this is a sitemapindex file which is not currently supported, please use the content (url) of the urlset instead");
+			sitemaps = doc.select(sitemapURLKey);
+		}
+
+		return elements;
+	}
+}
diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json
new file mode 100644
index 000000000..1ac0b50de
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json
@@ -0,0 +1,44 @@
+[
+  {
+    "paramName": "n",
+    "paramLongName": "nameNode",
+    "paramDescription": "the Name Node URI",
+    "paramRequired": true
+  },
+  {
+    "paramName": "w",
+    "paramLongName": "workingPath",
+    "paramDescription": "the working path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "r",
+    "paramLongName": "rdfOutput",
+    "paramDescription": "the working path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "u",
+    "paramLongName": "sitemapUrl",
+    "paramDescription": "the sitemap url",
+    "paramRequired": true
+  },
+  {
+    "paramName": "k",
+    "paramLongName": "sitemapURLKey",
+    "paramDescription": "the sitemap file contains a list of xml entries, each one has a tag identified with sitemapURLKey with the url as value",
+    "paramRequired": true
+  },
+  {
+    "paramName": "d",
+    "paramLongName": "dynamic",
+    "paramDescription": "the dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information (dynamic and static respectively)",
+    "paramRequired": false
+  },
+  {
+    "paramName": "m",
+    "paramLongName": "maxScrapedPages",
+    "paramDescription": "max number of pages that will be scraped, default: no limit",
+    "paramRequired": false
+  }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/config-default.xml b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/config-default.xml
new file mode 100644
index 000000000..7b13aab55
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/config-default.xml
@@ -0,0 +1,68 @@
+<configuration>
+
+    <!-- OCEAN  -->
+
+<!--    <property>-->
+<!--        <name>jobTracker</name>-->
+<!--        <value>yarnRM</value>-->
+<!--    </property>-->
+<!--    <property>-->
+<!--        <name>nameNode</name>-->
+<!--        <value>hdfs://nameservice1</value>-->
+<!--    </property>-->
+<!--    <property>-->
+<!--        <name>hive_metastore_uris</name>-->
+<!--        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>-->
+<!--    </property>-->
+<!--    <property>-->
+<!--        <name>spark2YarnHistoryServerAddress</name>-->
+<!--        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>-->
+<!--    </property>-->
+
+
+    <!-- GARR  -->
+
+    <property>
+        <name>jobTracker</name>
+        <value>yarn</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
+    </property>
+    <property>
+        <name>hive_metastore_uris</name>
+        <value>thrift://hadoop-edge3.garr-pa1.d4science.org:9083</value>
+    </property>
+    <property>
+        <name>spark2YarnHistoryServerAddress</name>
+        <value>http://hadoop-rm2.garr-pa1.d4science.org:19888</value>
+    </property>
+
+
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>spark2EventLogDir</name>
+        <value>/user/spark/spark2ApplicationHistory</value>
+    </property>
+    <property>
+        <name>spark2ExtraListeners</name>
+        <value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
+    </property>
+    <property>
+        <name>spark2SqlQueryExecutionListeners</name>
+        <value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
+    </property>
+</configuration>
\ No newline at end of file
diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/workflow.xml
new file mode 100644
index 000000000..705396653
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/workflow.xml
@@ -0,0 +1,98 @@
+<workflow-app name="BioSchemaHarvester" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>workingPath</name>
+            <value>/data/bioschema/disprot/</value>
+            <description>the working path</description>
+        </property>
+        <property>
+            <name>rdfOutput</name>
+            <value>nquads.seq</value>
+            <description>rdf output of scraping step</description>
+        </property>
+        <property>
+            <name>sitemapUrl</name>
+            <value>https://disprot.org/sitemap2.xml.gz</value>
+        </property>
+        <property>
+            <name>sitemapURLKey</name>
+            <value>loc</value>
+        </property>
+        <property>
+            <name>dynamic</name>
+            <value>true</value>
+            <description>the dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information (dynamic and static respectively)</description>
+        </property>
+        <property>
+            <name>maxScrapedPages</name>
+            <value>100</value>
+            <description>max number of pages that will be scraped, default: no limit</description>
+        </property>
+        <property>
+            <name>oozie.launcher.mapreduce.map.java.opts</name>
+            <value>-Xmx4g</value>
+        </property>
+        <property>
+            <name>spark2RdfConversionMaxExecutors</name>
+            <value>50</value>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <value>7G</value>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <value>2G</value>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+    </global>
+
+    <start to="ResetWorkingPath"/>
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="ResetWorkingPath">
+        <fs>
+            <delete path='${workingPath}${rdfOutput}'/>
+        </fs>
+        <ok to="bmuseScraping"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="bmuseScraping">
+        <java>
+            <name-node>${nameNode}</name-node>
+            <main-class>eu.dnetlib.dhp.bmuse.bioschema.ScrapingJob</main-class>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
+            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--rdfOutput</arg><arg>${rdfOutput}</arg>
+            <arg>--sitemapUrl</arg><arg>${sitemapUrl}</arg>
+            <arg>--sitemapURLKey</arg><arg>${sitemapURLKey}</arg>
+            <arg>--dynamic</arg><arg>${dynamic}</arg>
+        </java>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
\ No newline at end of file
diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/localconfig.properties b/dhp-workflows/dhp-bmuse/src/main/resources/localconfig.properties
new file mode 100644
index 000000000..26f94f2df
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/main/resources/localconfig.properties
@@ -0,0 +1,4 @@
+maxLimitScrape=200000
+schemaContext=https\://schema.org/docs/jsonldcontext.jsonld
+dynamic=true
+chromiumDriverLocation=/bin/chromedriver
\ No newline at end of file
diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/log4j.properties b/dhp-workflows/dhp-bmuse/src/main/resources/log4j.properties
new file mode 100644
index 000000000..63cba917e
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/main/resources/log4j.properties
@@ -0,0 +1,9 @@
+# Set root logger level to DEBUG and its only appender to A1.
+log4j.rootLogger=INFO, A1
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
diff --git a/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/SitemapTest.java b/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/SitemapTest.java
new file mode 100644
index 000000000..2b87d069a
--- /dev/null
+++ b/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/SitemapTest.java
@@ -0,0 +1,24 @@
+
+package eu.dnetlib.dhp.bmuse.bioschema;
+
+import org.jsoup.select.Elements;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.bmuse.utils.UrlParser;
+
+public class SitemapTest {
+
+	static Logger logger = LoggerFactory.getLogger(SitemapTest.class);
+
+	@Test
+	@Disabled
+	void sitemapGzTest() throws Exception {
+		Elements urls = UrlParser.getSitemapList("https://disprot.org/sitemap2.xml.gz", "loc");
+		urls.forEach(url -> {
+			logger.info(url.text());
+		});
+	}
+}
diff --git a/dhp-workflows/dhp-rdfconverter/pom.xml b/dhp-workflows/dhp-rdfconverter/pom.xml
new file mode 100644
index 000000000..77d48a1e9
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/pom.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>eu.dnetlib.dhp</groupId>
+        <artifactId>dhp-workflows</artifactId>
+        <version>1.2.4-SNAPSHOT</version>
+    </parent>
+    <artifactId>dhp-rdfconverter</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_2.11</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_2.11</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>eu.dnetlib.dhp</groupId>
+            <artifactId>dhp-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.any23</groupId>
+            <artifactId>apache-any23-core</artifactId>
+            <version>2.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.eclipse.rdf4j</groupId>
+            <artifactId>rdf4j-rio-rdfxml</artifactId>
+            <version>2.5.4</version>
+        </dependency>
+        <dependency>
+            <groupId>org.eclipse.rdf4j</groupId>
+            <artifactId>rdf4j-model</artifactId>
+            <version>2.5.4</version>
+        </dependency>
+    </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/SparkRdfToDatacite.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/SparkRdfToDatacite.java
new file mode 100644
index 000000000..057492dab
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/SparkRdfToDatacite.java
@@ -0,0 +1,71 @@
+
+package eu.dnetlib.dhp.rdfconverter.bioschema;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.rdfconverter.utils.RDFConverter;
+
+public class SparkRdfToDatacite {
+
+	static Logger logger = LoggerFactory.getLogger(SparkRdfToDatacite.class);
+
+	public static void main(String[] args) throws Exception {
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+			IOUtils
+				.toString(
+					SparkRdfToDatacite.class
+						.getResourceAsStream(
+							"/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json")));
+		parser.parseArgument(args);
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		final String workingPath = parser.get("workingPath");
+		final String rdfNquadsRecords = parser.get("rdfInput");
+		final String output = parser.get("output");
+
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+				String rdfNquadsRecordsPath = workingPath.concat(rdfNquadsRecords);
+				JavaPairRDD<Text, Text> rdfNquadsRecordsRDD = sc
+					.sequenceFile(rdfNquadsRecordsPath, Text.class, Text.class);
+				logger.info("Rdf nquads records retrieved: {}", rdfNquadsRecordsRDD.count());
+
+				JavaRDD<Text> proteins = rdfNquadsRecordsRDD.flatMap(nquads -> {
+					RDFConverter converter = new RDFConverter();
+					ArrayList<String> jsonlds = null;
+					try {
+						jsonlds = converter.nQuadsFile2DataciteJson(nquads._2().toString());
+					} catch (Exception e) {
+						logger.error(nquads._1().toString(), e);
+						return Arrays.asList(new String()).iterator();
+					}
+					return jsonlds.iterator();
+				}).filter(Objects::nonNull).filter(jsonld -> !jsonld.isEmpty()).map(jsonld -> new Text(jsonld));
+				logger.info("json datacite generated: {}", proteins.count());
+				proteins.saveAsTextFile(workingPath.concat(output), GzipCodec.class);
+			});
+	}
+}
diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java
new file mode 100644
index 000000000..ffac36459
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java
@@ -0,0 +1,384 @@
+
+package eu.dnetlib.dhp.rdfconverter.bioschema.model;
+
+import java.util.List;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class BioSchemaProtein {
+	@JsonProperty("@id")
+	private String id;
+	@JsonProperty("@graph")
+	private List<Entry> entryList;
+	@JsonProperty("http://purl.org/pav/retrievedOn")
+	private DateTimeType retrievedOn;
+	@JsonProperty("citation")
+	private Citation citation;
+
+	public static class Entry {
+		@JsonProperty("@id")
+		private String id;
+		@JsonProperty("@type")
+		private List<String> type;
+		@JsonProperty("https://schema.org/identifier")
+		private String identifier;
+		@JsonProperty("https://schema.org/name")
+		private String name;
+		@JsonProperty("associatedDisease")
+		private List<AssociatedDisease> associatedDisease;
+		@JsonProperty("description")
+		private String description;
+		@JsonProperty("isEncodedByBioChemEntity")
+		private String isEncodedByBioChemEntity;
+		@JsonProperty("url")
+		private String url;
+		@JsonProperty("alternateName")
+		private String alternateName;
+		@JsonProperty("bioChemInteraction")
+		private List<Link> bioChemInteraction;
+		@JsonProperty("bioChemSimilarity")
+		private List<Link> bioChemSimilarity;
+		@JsonProperty("hasMolecularFunction")
+		private String hasMolecularFunction;
+		@JsonProperty("image")
+		private String image;
+		@JsonProperty("isInvolvedInBiologicalProcess")
+		private String isInvolvedInBiologicalProcess;
+		@JsonProperty("isPartOfBioChemEntity")
+		private IsPartOfBioChemEntity isPartOfBioChemEntity;
+		@JsonProperty("mainEntityOfPage")
+		private Link mainEntityOfPage;
+
+		public String getId() {
+			return id;
+		}
+
+		public void setId(String id) {
+			this.id = id;
+		}
+
+		public List<String> getType() {
+			return type;
+		}
+
+		public void setType(List<String> type) {
+			this.type = type;
+		}
+
+		public String getName() {
+			return name;
+		}
+
+		public void setName(String name) {
+			this.name = name;
+		}
+
+		@JsonProperty("https://schema.org/sameAs")
+		private List<Link> sameAs;
+
+		public List<Link> getSameAs() {
+			return sameAs;
+		}
+
+		public void setSameAs(List<Link> sameAs) {
+			this.sameAs = sameAs;
+		}
+
+		public String getIdentifier() {
+			return identifier;
+		}
+
+		public void setIdentifier(String identifier) {
+			this.identifier = identifier;
+		}
+
+		public String getDescription() {
+			return description;
+		}
+
+		public void setDescription(String description) {
+			this.description = description;
+		}
+
+		public String getIsEncodedByBioChemEntity() {
+			return isEncodedByBioChemEntity;
+		}
+
+		public void setIsEncodedByBioChemEntity(String isEncodedByBioChemEntity) {
+			this.isEncodedByBioChemEntity = isEncodedByBioChemEntity;
+		}
+
+		public String getUrl() {
+			return url;
+		}
+
+		public void setUrl(String url) {
+			this.url = url;
+		}
+
+		public String getAlternateName() {
+			return alternateName;
+		}
+
+		public void setAlternateName(String alternateName) {
+			this.alternateName = alternateName;
+		}
+
+		public List<Link> getBioChemInteraction() {
+			return bioChemInteraction;
+		}
+
+		public void setBioChemInteraction(List<Link> bioChemInteraction) {
+			this.bioChemInteraction = bioChemInteraction;
+		}
+
+		public List<Link> getBioChemSimilarity() {
+			return bioChemSimilarity;
+		}
+
+		public void setBioChemSimilarity(List<Link> bioChemSimilarity) {
+			this.bioChemSimilarity = bioChemSimilarity;
+		}
+
+		public String getHasMolecularFunction() {
+			return hasMolecularFunction;
+		}
+
+		public void setHasMolecularFunction(String hasMolecularFunction) {
+			this.hasMolecularFunction = hasMolecularFunction;
+		}
+
+		public String getImage() {
+			return image;
+		}
+
+		public void setImage(String image) {
+			this.image = image;
+		}
+
+		public String getIsInvolvedInBiologicalProcess() {
+			return isInvolvedInBiologicalProcess;
+		}
+
+		public void setIsInvolvedInBiologicalProcess(String isInvolvedInBiologicalProcess) {
+			this.isInvolvedInBiologicalProcess = isInvolvedInBiologicalProcess;
+		}
+
+		public List<AssociatedDisease> getAssociatedDisease() {
+			return associatedDisease;
+		}
+
+		public void setAssociatedDisease(List<AssociatedDisease> associatedDisease) {
+			this.associatedDisease = associatedDisease;
+		}
+
+		public IsPartOfBioChemEntity getIsPartOfBioChemEntity() {
+			return isPartOfBioChemEntity;
+		}
+
+		public void setIsPartOfBioChemEntity(IsPartOfBioChemEntity isPartOfBioChemEntity) {
+			this.isPartOfBioChemEntity = isPartOfBioChemEntity;
+		}
+
+		public Link getMainEntityOfPage() {
+			return mainEntityOfPage;
+		}
+
+		public void setMainEntityOfPage(Link mainEntityOfPage) {
+			this.mainEntityOfPage = mainEntityOfPage;
+		}
+
+	}
+
+	public static class IsPartOfBioChemEntity {
+		@JsonProperty("@type")
+		private String type;
+		@JsonProperty("url")
+		private String url;
+
+		public String getType() {
+			return type;
+		}
+
+		public void setType(String type) {
+			this.type = type;
+		}
+
+		public String getUrl() {
+			return url;
+		}
+
+		public void setUrl(String url) {
+			this.url = url;
+		}
+	}
+
+	public static class AssociatedDisease {
+		@JsonProperty("@type")
+		private String type;
+		@JsonProperty("name")
+		private String name;
+		@JsonProperty("code")
+		private DeseaseCode code;
+		@JsonProperty("id")
+		private String id;
+
+		public String getType() {
+			return type;
+		}
+
+		public void setType(String type) {
+			this.type = type;
+		}
+
+		public String getName() {
+			return name;
+		}
+
+		public void setName(String name) {
+			this.name = name;
+		}
+
+		public DeseaseCode getCode() {
+			return code;
+		}
+
+		public void setCode(DeseaseCode code) {
+			this.code = code;
+		}
+
+		public String getId() {
+			return id;
+		}
+
+		public void setId(String id) {
+			this.id = id;
+		}
+	}
+
+	public static class DeseaseCode {
+		@JsonProperty("@type")
+		private String type;
+		@JsonProperty("codeValue")
+		private String codeValue;
+		@JsonProperty("codingSystem")
+		private String codingSystem;
+
+		public String getType() {
+			return type;
+		}
+
+		public void setType(String type) {
+			this.type = type;
+		}
+
+		public String getCodeValue() {
+			return codeValue;
+		}
+
+		public void setCodeValue(String codeValue) {
+			this.codeValue = codeValue;
+		}
+
+		public String getCodingSystem() {
+			return codingSystem;
+		}
+
+		public void setCodingSystem(String codingSystem) {
+			this.codingSystem = codingSystem;
+		}
+	}
+
+	public static class Link {
+		@JsonProperty("@id")
+		private String id;
+
+		public String getId() {
+			return id;
+		}
+
+		public void setId(String id) {
+			this.id = id;
+		}
+	}
+
+	public static class DateTimeType {
+		@JsonProperty("@type")
+		private String type;
+		@JsonProperty("@value")
+		private String value;
+
+		public String getType() {
+			return type;
+		}
+
+		public void setType(String type) {
+			this.type = type;
+		}
+
+		public String getValue() {
+			return value;
+		}
+
+		public void setValue(String value) {
+			this.value = value;
+		}
+	}
+
+	public static class Citation {
+		@JsonProperty("@type")
+		private String type;
+		@JsonProperty("@id")
+		private String id;
+
+		public String getType() {
+			return type;
+		}
+
+		public void setType(String type) {
+			this.type = type;
+		}
+
+		public String getId() {
+			return id;
+		}
+
+		public void setId(String id) {
+			this.id = id;
+		}
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public List<Entry> getEntryList() {
+		return entryList;
+	}
+
+	public void setEntryList(List<Entry> entryList) {
+		this.entryList = entryList;
+	}
+
+	public DateTimeType getRetrievedOn() {
+		return retrievedOn;
+	}
+
+	public void setRetrievedOn(DateTimeType retrievedOn) {
+		this.retrievedOn = retrievedOn;
+	}
+
+	public Citation getCitation() {
+		return citation;
+	}
+
+	public void setCitation(Citation citation) {
+		this.citation = citation;
+	}
+}
diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/DataciteProtein.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/DataciteProtein.java
new file mode 100644
index 000000000..6fe0963e2
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/DataciteProtein.java
@@ -0,0 +1,291 @@
+
+package eu.dnetlib.dhp.rdfconverter.bioschema.model;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class DataciteProtein {
+	private String id;
+	private String doi;
+	private Types types;
+	List<Creators> creators = new ArrayList<Creators>();
+	private String publisher;
+	private String publicationYear;
+	private static final String schemaVersion = "http://datacite.org/schema/kernel-4";
+	List<Identifier> identifiers = new ArrayList<Identifier>();
+	List<RelatedIdentifier> relatedIdentifiers = new ArrayList<RelatedIdentifier>();
+	List<AlternateIdentifier> alternateIdentifiers = new ArrayList<AlternateIdentifier>();
+	List<Description> descriptions = new ArrayList<Description>();
+	List<Title> titles = new ArrayList<Title>();
+	private List<DataciteDate> dates = new ArrayList<DataciteDate>();
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public static class Types {
+		private String resourceType;
+		private String resourceTypeGeneral;
+
+		public String getResourceType() {
+			return resourceType;
+		}
+
+		public void setResourceType(String resourceType) {
+			this.resourceType = resourceType;
+		}
+
+		public String getResourceTypeGeneral() {
+			return resourceTypeGeneral;
+		}
+
+		public void setResourceTypeGeneral(String resourceTypeGeneral) {
+			this.resourceTypeGeneral = resourceTypeGeneral;
+		}
+	}
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public static class Creators {
+	}
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public static class Identifier {
+		private String identifier;
+		private String identifierType;
+
+		public String getIdentifier() {
+			return identifier;
+		}
+
+		public void setIdentifier(String identifier) {
+			this.identifier = identifier;
+		}
+
+		public String getIdentifierType() {
+			return identifierType;
+		}
+
+		public void setIdentifierType(String identifierType) {
+			this.identifierType = identifierType;
+		}
+	}
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public static class RelatedIdentifier {
+		private String relationType;
+		private String relatedIdentifier;
+		private String relatedIdentifierType;
+
+		public String getRelationType() {
+			return relationType;
+		}
+
+		public void setRelationType(String relationType) {
+			this.relationType = relationType;
+		}
+
+		public String getRelatedIdentifier() {
+			return relatedIdentifier;
+		}
+
+		public void setRelatedIdentifier(String relatedIdentifier) {
+			this.relatedIdentifier = relatedIdentifier;
+		}
+
+		public String getRelatedIdentifierType() {
+			return relatedIdentifierType;
+		}
+
+		public void setRelatedIdentifierType(String relatedIdentifierType) {
+			this.relatedIdentifierType = relatedIdentifierType;
+		}
+	}
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public static class AlternateIdentifier {
+		private String alternateIdentifier;
+		private String alternateIdentifierType;
+
+		public String getAlternateIdentifier() {
+			return alternateIdentifier;
+		}
+
+		public void setAlternateIdentifier(String alternateIdentifier) {
+			this.alternateIdentifier = alternateIdentifier;
+		}
+
+		public String getAlternateIdentifierType() {
+			return alternateIdentifierType;
+		}
+
+		public void setAlternateIdentifierType(String alternateIdentifierType) {
+			this.alternateIdentifierType = alternateIdentifierType;
+		}
+	}
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public static class Description {
+		private String description;
+		private String descriptionType;
+
+		public String getDescription() {
+			return description;
+		}
+
+		public void setDescription(String description) {
+			this.description = description;
+		}
+
+		public String getDescriptionType() {
+			return descriptionType;
+		}
+
+		public void setDescriptionType(String descriptionType) {
+			this.descriptionType = descriptionType;
+		}
+	}
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public static class Title {
+		private String title;
+		private String titleType;
+
+		public String getTitle() {
+			return title;
+		}
+
+		public void setTitle(String title) {
+			this.title = title;
+		}
+
+		public String getTitleType() {
+			return titleType;
+		}
+
+		public void setTitleType(String titleType) {
+			this.titleType = titleType;
+		}
+	}
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public static class DataciteDate {
+		private String date;
+		private String dateType;
+
+		public String getDate() {
+			return date;
+		}
+
+		public void setDate(String date) {
+			this.date = date;
+		}
+
+		public String getDateType() {
+			return dateType;
+		}
+
+		public void setDateType(String dateType) {
+			this.dateType = dateType;
+		}
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public String getDoi() {
+		return doi;
+	}
+
+	public void setDoi(String doi) {
+		this.doi = doi;
+	}
+
+	public Types getTypes() {
+		return types;
+	}
+
+	public void setTypes(Types types) {
+		this.types = types;
+	}
+
+	public List<Creators> getCreators() {
+		return creators;
+	}
+
+	public void setCreators(List<Creators> creators) {
+		this.creators = creators;
+	}
+
+	public String getPublisher() {
+		return publisher;
+	}
+
+	public void setPublisher(String publisher) {
+		this.publisher = publisher;
+	}
+
+	public String getPublicationYear() {
+		return publicationYear;
+	}
+
+	public void setPublicationYear(String publicationYear) {
+		this.publicationYear = publicationYear;
+	}
+
+	public static String getSchemaVersion() {
+		return schemaVersion;
+	}
+
+	public List<RelatedIdentifier> getRelatedIdentifiers() {
+		return relatedIdentifiers;
+	}
+
+	public void setRelatedIdentifiers(List<RelatedIdentifier> relatedIdentifiers) {
+		this.relatedIdentifiers = relatedIdentifiers;
+	}
+
+	public List<AlternateIdentifier> getAlternateIdentifiers() {
+		return alternateIdentifiers;
+	}
+
+	public void setAlternateIdentifiers(List<AlternateIdentifier> alternateIdentifiers) {
+		this.alternateIdentifiers = alternateIdentifiers;
+	}
+
+	public List<Description> getDescriptions() {
+		return descriptions;
+	}
+
+	public void setDescriptions(List<Description> descriptions) {
+		this.descriptions = descriptions;
+	}
+
+	public List<Title> getTitles() {
+		return titles;
+	}
+
+	public void setTitles(List<Title> titles) {
+		this.titles = titles;
+	}
+
+	public List<Identifier> getIdentifiers() {
+		return identifiers;
+	}
+
+	public void setIdentifiers(List<Identifier> identifiers) {
+		this.identifiers = identifiers;
+	}
+
+	public List<DataciteDate> getDates() {
+		return dates;
+	}
+
+	public void setDates(List<DataciteDate> dates) {
+		this.dates = dates;
+	}
+}
diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java
new file mode 100644
index 000000000..08e4a03ad
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java
@@ -0,0 +1,197 @@
+
+package eu.dnetlib.dhp.rdfconverter.utils;
+
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.util.*;
+
+import org.eclipse.rdf4j.model.Model;
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.RDFWriter;
+import org.eclipse.rdf4j.rio.Rio;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.github.jsonldjava.core.JsonLdOptions;
+import com.github.jsonldjava.core.JsonLdProcessor;
+import com.github.jsonldjava.utils.JsonUtils;
+
+import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein;
+import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein;
+
+public class RDFConverter {
+
+	private static final Logger log = LoggerFactory.getLogger(RDFConverter.class);
+
+	public ArrayList<String> nQuadsFile2DataciteJson(String nquads) throws Exception {
+		StringReader reader = new StringReader(nquads);
+		Model model = Rio.parse(reader, "", RDFFormat.NQUADS);
+		StringWriter jsonLDWriter = new StringWriter();
+		RDFWriter rdfRecordWriter = Rio.createWriter(RDFFormat.JSONLD, jsonLDWriter);
+		Rio.write(model, rdfRecordWriter);
+		String jsonLDBuffer = jsonLDWriter.toString();
+		Object jsonObject = JsonUtils.fromString(jsonLDBuffer);
+		Object compact = JsonLdProcessor.compact(jsonObject, new HashMap<>(), new JsonLdOptions());
+		String compactContent = JsonUtils.toString(compact);
+
+		ObjectMapper objectMapper = new ObjectMapper();
+		objectMapper.enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY);
+		objectMapper.enable(DeserializationFeature.ACCEPT_EMPTY_STRING_AS_NULL_OBJECT);
+		objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+		BioSchemaProtein bioSchemaProtein = objectMapper.readValue(compactContent, BioSchemaProtein.class);
+		log.debug("BioSchema id: " + bioSchemaProtein.getId());
+		BioSchemaProtein.DateTimeType retrievedOnType = bioSchemaProtein.getRetrievedOn();
+		BioSchemaProtein.Citation citation = bioSchemaProtein.getCitation();
+
+		ArrayList<String> results = new ArrayList<String>();
+		bioSchemaProtein.getEntryList().stream().forEach(entry -> {
+
+			if (entry.getType() != null
+				&& entry.getType().stream().filter(type -> type.equals("https://schema.org/Protein")).count() == 1) {
+
+				DataciteProtein dataciteProtein = new DataciteProtein();
+				if (citation != null) {
+					addRelatedIdentifier(dataciteProtein, citation.getId(), "CitedBy");
+				}
+
+				DataciteProtein.Types types = new DataciteProtein.Types();
+				types.setResourceType("Protein");
+				types.setResourceTypeGeneral("Dataset");
+				dataciteProtein.setTypes(types);
+
+				DataciteProtein.DataciteDate dataciteDate = new DataciteProtein.DataciteDate();
+				dataciteDate.setDate(retrievedOnType.getValue());
+				dataciteDate.setDateType("Collected");
+				dataciteProtein.getDates().add(dataciteDate);
+
+				if (entry.getName() != null) {
+					log.debug("Name: " + entry.getName());
+					DataciteProtein.Title title = new DataciteProtein.Title();
+					title.setTitle(entry.getName());
+					dataciteProtein.getTitles().add(title);
+				}
+				DataciteProtein.Identifier identifier = new DataciteProtein.Identifier();
+				log.debug("Id: " + entry.getId());
+				identifier.setIdentifier(entry.getId());
+				identifier.setIdentifierType("URL");
+				dataciteProtein.getIdentifiers().add(identifier);
+
+				if (entry.getIdentifier() != null) {
+					log.debug("Identifier: " + entry.getIdentifier());
+					addAlternateIdentifier(dataciteProtein, entry.getIdentifier());
+				}
+
+				if (entry.getDescription() != null) {
+					log.debug("description: " + entry.getDescription());
+					DataciteProtein.Description description = new DataciteProtein.Description();
+					description.setDescription(entry.getDescription());
+					dataciteProtein.getDescriptions().add(description);
+				}
+
+				if (entry.getIsEncodedByBioChemEntity() != null) {
+					log.debug("isEncodedByBioChemEntity: " + entry.getIsEncodedByBioChemEntity());
+					addRelatedIdentifier(dataciteProtein, entry.getIsEncodedByBioChemEntity(), "");
+				}
+
+				if (entry.getUrl() != null) {
+					log.debug("url: " + entry.getUrl());
+					addAlternateIdentifier(dataciteProtein, entry.getUrl());
+				}
+
+				if (entry.getAlternateName() != null) {
+					log.debug("alternateName: " + entry.getAlternateName());
+					DataciteProtein.Title title = new DataciteProtein.Title();
+					title.setTitle(entry.getAlternateName());
+					title.setTitleType("AlternativeTitle");
+					dataciteProtein.getTitles().add(title);
+				}
+
+				if (entry.getBioChemInteraction() != null) {
+					entry.getBioChemInteraction().stream().filter(Objects::nonNull).forEach(bc -> {
+						log.debug("bioChemInteraction: " + bc.getId());
+						addRelatedIdentifier(dataciteProtein, bc.getId(), "");
+					});
+				}
+
+				if (entry.getBioChemSimilarity() != null) {
+					entry.getBioChemSimilarity().stream().filter(Objects::nonNull).forEach(bc -> {
+						log.debug("bioChemSimilarity: " + bc.getId());
+						addRelatedIdentifier(dataciteProtein, bc.getId(), "");
+					});
+				}
+
+				if (entry.getHasMolecularFunction() != null) {
+					log.debug("hasMolecularFunction: " + entry.getHasMolecularFunction());
+					addRelatedIdentifier(dataciteProtein, entry.getHasMolecularFunction(), "");
+				}
+
+				if (entry.getIsInvolvedInBiologicalProcess() != null) {
+					log.debug("isInvolvedInBiologicalProcess: " + entry.getIsInvolvedInBiologicalProcess());
+					addRelatedIdentifier(dataciteProtein, entry.getIsInvolvedInBiologicalProcess(), "");
+				}
+
+				if (entry.getIsEncodedByBioChemEntity() != null) {
+					log.debug("isEncodedByBioChemEntity: " + entry.getIsEncodedByBioChemEntity());
+					addRelatedIdentifier(dataciteProtein, entry.getIsEncodedByBioChemEntity(), "");
+				}
+
+				if (entry.getIsPartOfBioChemEntity() != null) {
+					log.debug("isPartOfBioChemEntity: " + entry.getIsPartOfBioChemEntity());
+					addRelatedIdentifier(dataciteProtein, entry.getIsPartOfBioChemEntity().getUrl(), "");
+				}
+
+				if (entry.getSameAs() != null) {
+					entry.getSameAs().stream().filter(Objects::nonNull).forEach(sameAs -> {
+						log.debug("sameAs: " + sameAs.getId());
+						addRelatedIdentifier(dataciteProtein, sameAs.getId(), "IsIdenticalTo");
+					});
+				}
+
+				if (entry.getAssociatedDisease() != null) {
+					entry.getAssociatedDisease().stream().filter(Objects::nonNull).forEach(ad -> {
+						log.debug("associated disease: " + ad.getName());
+						addRelatedIdentifier(dataciteProtein, ad.getName(), "IsIdenticalTo");
+					});
+				}
+
+				String proteinId = "";
+				try {
+					String[] identifierParts = dataciteProtein.getIdentifiers().get(0).getIdentifier().split("/");
+					proteinId = identifierParts[identifierParts.length - 1];
+				} catch (Exception e) {
+					log.error("Identifier not found", e.getMessage());
+				}
+
+				dataciteProtein.setId(proteinId);
+
+				ObjectMapper mapper = new ObjectMapper();
+				try {
+					StringWriter writer = new StringWriter();
+					mapper.writeValue(writer, dataciteProtein);
+					results.add(writer.toString());
+				} catch (Exception e) {
+					throw new RuntimeException(e);
+				}
+			}
+		});
+		return results;
+	}
+
+	private void addRelatedIdentifier(DataciteProtein DataciteProtein, String relatedIdentifierValue,
+		String relationType) {
+		DataciteProtein.RelatedIdentifier relatedIdentifier = new DataciteProtein.RelatedIdentifier();
+		relatedIdentifier.setRelatedIdentifier(relatedIdentifierValue);
+		if (!relationType.isEmpty()) {
+			relatedIdentifier.setRelationType(relationType);
+		}
+		DataciteProtein.getRelatedIdentifiers().add(relatedIdentifier);
+	}
+
+	private void addAlternateIdentifier(DataciteProtein DataciteProtein, String alternateIdentifierValue) {
+		DataciteProtein.AlternateIdentifier alternateIdentifier = new DataciteProtein.AlternateIdentifier();
+		alternateIdentifier.setAlternateIdentifier(alternateIdentifierValue);
+		DataciteProtein.getAlternateIdentifiers().add(alternateIdentifier);
+	}
+}
diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json
new file mode 100644
index 000000000..4038c275f
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json
@@ -0,0 +1,26 @@
+[
+  {
+    "paramName": "n",
+    "paramLongName": "nameNode",
+    "paramDescription": "the Name Node URI",
+    "paramRequired": true
+  },
+  {
+    "paramName": "w",
+    "paramLongName": "workingPath",
+    "paramDescription": "the working path",
+    "paramRequired": true
+  },
+  {
+    "paramName": "i",
+    "paramLongName": "rdfInput",
+    "paramDescription": "sequence file inside working path that contains rdf records",
+    "paramRequired": true
+  },
+  {
+    "paramName": "o",
+    "paramLongName": "output",
+    "paramDescription": "relative path inside workingpath where bioschema dataset in datacite format will be stored",
+    "paramRequired": true
+  }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/config-default.xml b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/config-default.xml
new file mode 100644
index 000000000..7b13aab55
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/config-default.xml
@@ -0,0 +1,68 @@
+<configuration>
+
+    <!-- OCEAN  -->
+
+<!--    <property>-->
+<!--        <name>jobTracker</name>-->
+<!--        <value>yarnRM</value>-->
+<!--    </property>-->
+<!--    <property>-->
+<!--        <name>nameNode</name>-->
+<!--        <value>hdfs://nameservice1</value>-->
+<!--    </property>-->
+<!--    <property>-->
+<!--        <name>hive_metastore_uris</name>-->
+<!--        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>-->
+<!--    </property>-->
+<!--    <property>-->
+<!--        <name>spark2YarnHistoryServerAddress</name>-->
+<!--        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>-->
+<!--    </property>-->
+
+
+    <!-- GARR  -->
+
+    <property>
+        <name>jobTracker</name>
+        <value>yarn</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
+    </property>
+    <property>
+        <name>hive_metastore_uris</name>
+        <value>thrift://hadoop-edge3.garr-pa1.d4science.org:9083</value>
+    </property>
+    <property>
+        <name>spark2YarnHistoryServerAddress</name>
+        <value>http://hadoop-rm2.garr-pa1.d4science.org:19888</value>
+    </property>
+
+
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>oozie.action.sharelib.for.spark</name>
+        <value>spark2</value>
+    </property>
+    <property>
+        <name>spark2EventLogDir</name>
+        <value>/user/spark/spark2ApplicationHistory</value>
+    </property>
+    <property>
+        <name>spark2ExtraListeners</name>
+        <value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
+    </property>
+    <property>
+        <name>spark2SqlQueryExecutionListeners</name>
+        <value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
+    </property>
+</configuration>
\ No newline at end of file
diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml
new file mode 100644
index 000000000..0a7a2495f
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml
@@ -0,0 +1,94 @@
+<workflow-app name="RdfConverter" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>workingPath</name>
+            <value>/data/bioschema/disprot/</value>
+            <description>the working path</description>
+        </property>
+        <property>
+            <name>rdfInput</name>
+            <value>nquads.seq</value>
+            <description>rdf output of scraping workflow</description>
+        </property>
+        <property>
+            <name>output</name>
+            <value>json-datacite/</value>
+        </property>
+        <property>
+            <name>oozie.launcher.mapreduce.map.java.opts</name>
+            <value>-Xmx4g</value>
+        </property>
+        <property>
+            <name>spark2RdfConversionMaxExecutors</name>
+            <value>50</value>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <value>7G</value>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <value>2G</value>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+    </global>
+
+    <start to="ResetWorkingPath"/>
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+
+    <action name="ResetWorkingPath">
+        <fs>
+            <delete path='${workingPath}${output}'/>
+        </fs>
+        <ok to="NquadsToDataciteJson"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="NquadsToDataciteJson">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>NquadsToDataciteJson</name>
+            <class>eu.dnetlib.dhp.rdfconverter.bioschema.SparkRdfToDatacite</class>
+            <jar>dhp-rdfconverter-${projectVersion}.jar</jar>
+            <spark-opts>
+                --conf spark.dynamicAllocation.enabled=true
+                --conf spark.dynamicAllocation.maxExecutors=${spark2RdfConversionMaxExecutors}
+                --executor-memory=${sparkExecutorMemory}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
+            <arg>--workingPath</arg><arg>${workingPath}</arg>
+            <arg>--rdfInput</arg><arg>${rdfInput}</arg>
+            <arg>--output</arg><arg>${output}</arg>
+        </spark>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+
+    <end name="End"/>
+</workflow-app>
\ No newline at end of file
diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/log4j.properties b/dhp-workflows/dhp-rdfconverter/src/main/resources/log4j.properties
new file mode 100644
index 000000000..63cba917e
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/log4j.properties
@@ -0,0 +1,9 @@
+# Set root logger level to DEBUG and its only appender to A1.
+log4j.rootLogger=INFO, A1
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
diff --git a/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java b/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java
new file mode 100644
index 000000000..e74c17352
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java
@@ -0,0 +1,31 @@
+
+package eu.dnetlib.dhp.rdfconverter.bioschema;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.rdfconverter.utils.RDFConverter;
+
+public class ConverterTest {
+
+	static Logger logger = LoggerFactory.getLogger(ConverterTest.class);
+
+	@Test
+//	@Disabled
+	public void nqToDataciteTest() throws Exception {
+		InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
+		String nq = IOUtils.toString(is);
+		logger.info("NQ: " + nq);
+		RDFConverter converter = new RDFConverter();
+		ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq);
+		if (results != null && !results.isEmpty()) {
+			logger.info("JSON DATACITE: " + results.get(0));
+		}
+	}
+}
diff --git a/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq b/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq
new file mode 100644
index 000000000..f26a4b1d9
--- /dev/null
+++ b/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq
@@ -0,0 +1,52 @@
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/retrievedFrom> <https://disprot.org/DP01454> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/retrievedOn> "2021-11-25T12:23:57"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/createdWith> <https://github.com/HW-SWeL/BMUSE/releases/tag/0.5.2> .
+<https://disprot.org/DP01454> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <https://schema.org/hasBioPolymerSequence> "MSTLFPSLFPRVTETLWFNLDRPCVEETELQQQEQQHQAWLQSIAEKDNNLVPIGKPASEHYDDEEEEDDEDDEDSEEDSEDDEDMQDMDEMNDYNESPDDGEVNEVDMEGNEQDQDQWMI" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <https://schema.org/hasSequenceAnnotation> <https://disprot.org/DP01454#disorder-content> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <https://schema.org/hasSequenceAnnotation> <https://disprot.org/DP01454r001> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <https://schema.org/identifier> "https://identifiers.org/disprot:DP01454" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <https://schema.org/includedInDataset> "https://disprot.org/#2021-08" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <https://schema.org/name> "Anaphase-promoting complex subunit 15" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P60006> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <https://schema.org/taxonomicRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/profiles/Protein/0.11-RELEASE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454#disorder-content> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454#disorder-content> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454#disorder-content> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/name> "Protein disorder content" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/propertyID> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00499> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/value> "5.371900826446281E-1" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://schema.org/rangeEnd> "121" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://schema.org/rangeStart> "1" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454r001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454r001> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454r001> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454r001> <https://schema.org/subjectOf> <https://identifiers.org/pubmed:26083744> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/name> "disorder" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/termCode> "IDPO:00076" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/assets/data/IDPO_v0.2.owl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/assets/data/IDPO_v0.2.owl> <https://schema.org/name> "IDP ontology" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://schema.org/rangeEnd> "121" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://schema.org/rangeStart> "57" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://identifiers.org/pubmed:26083744> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/ScholarlyArticle> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/inDefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <http://purl.uniprot.org/taxonomy/9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <https://identifiers.org/taxonomy:9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <http://purl.obolibrary.org/obo/NCBITaxon_9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/termCode> "9606" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/url> <http://purl.bioontology.org/ontology/NCBITAXON/9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://schema.org/name> "NCBI taxon" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://schema.org/url> <https://bioportal.bioontology.org/ontologies/NCBITAXON> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
+<https://disprot.org/DP01454> <http://purl.org/dc/terms/title> "DisProt" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
\ No newline at end of file
diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml
index 53d029467..db9608753 100644
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@@ -38,6 +38,8 @@
         <module>dhp-usage-raw-data-update</module>
         <module>dhp-broker-events</module>
         <module>dhp-doiboost</module>
+        <module>dhp-bmuse</module>
+        <module>dhp-rdfconverter</module>
     </modules>
 
     <pluginRepositories>
diff --git a/pom.xml b/pom.xml
index 9e9bbaa16..e0ce76b03 100644
--- a/pom.xml
+++ b/pom.xml
@@ -105,6 +105,18 @@
 				<enabled>false</enabled>
 			</snapshots>
 		</repository>
+		<repository>
+			<id>dnet-deps</id>
+			<name>D-Net Dependencies</name>
+			<url>https://maven.d4science.org/nexus/content/repositories/dnet-deps/</url>
+			<releases>
+				<enabled>true</enabled>
+			</releases>
+			<snapshots>
+				<enabled>false</enabled>
+			</snapshots>
+			<layout>default</layout>
+		</repository>
 	</repositories>
 
 	<dependencies>