added modules for downloading data with bmuse and converting to datacite json format

This commit is contained in:
Enrico Ottonello 2022-02-22 12:53:19 +01:00
parent 2bc79c50f8
commit 4975278558
27 changed files with 2281 additions and 0 deletions

View File

@ -0,0 +1,96 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId>
<version>1.2.4-SNAPSHOT</version>
</parent>
<artifactId>dhp-bmuse</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
</dependency>
<dependency>
<groupId>hwu.elixir</groupId>
<artifactId>bmuse-core</artifactId>
<version>0.5.4</version>
</dependency>
<dependency>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23-core</artifactId>
<version>2.3</version>
</dependency>
<dependency>
<groupId>org.eclipse.rdf4j</groupId>
<artifactId>rdf4j-rio-rdfxml</artifactId>
<version>3.7.1</version>
</dependency>
<dependency>
<groupId>org.eclipse.rdf4j</groupId>
<artifactId>rdf4j-model</artifactId>
<version>3.7.1</version>
</dependency>
<!-- rdf 2.5.4 to 3.7.1-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>3.141.59</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>commons-validator</groupId>
<artifactId>commons-validator</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>22.0</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.18</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.9.6</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.9.6</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.6</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,112 @@
package eu.dnetlib.dhp.bmuse.bioschema;
import java.text.SimpleDateFormat;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.bmuse.utils.ArgumentApplicationParser;
import eu.dnetlib.dhp.bmuse.utils.BMUSEScraper;
import eu.dnetlib.dhp.bmuse.utils.UrlParser;
public class ScrapingJob {
static Logger logger = LoggerFactory.getLogger(ScrapingJob.class);
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
ScrapingJob.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json")));
parser.parseArgument(args);
final String nameNode = parser.get("nameNode");
final String workingPath = parser.get("workingPath");
final String rdfOutput = parser.get("rdfOutput");
final String sitemapUrl = parser.get("sitemapUrl");
final String sitemapURLKey = parser.get("sitemapURLKey");
final String dynamic = parser.get("dynamic");
final String maxScrapedPages = parser.get("maxScrapedPages");
Boolean dynamicValue = true;
if (Objects.nonNull(dynamic)) {
dynamicValue = Boolean.parseBoolean(dynamic);
}
final boolean scrapingType = dynamicValue.booleanValue();
logger
.info(
"*************************** STARTING_SCRAPE");
BMUSEScraper scraper = new BMUSEScraper();
String url = sitemapUrl.toLowerCase();
Elements urls = UrlParser.getSitemapList(url, sitemapURLKey);
Path output = new Path(
nameNode
.concat(workingPath)
.concat(rdfOutput));
Configuration conf = getHadoopConfiguration(nameNode);
try (SequenceFile.Writer writer = SequenceFile
.createWriter(
conf,
SequenceFile.Writer.file(output),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class),
SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) {
Stream<Element> urlStream = null;
if (Objects.nonNull(maxScrapedPages)) {
urlStream = urls.stream().limit(Long.parseLong(maxScrapedPages));
} else {
urlStream = urls.stream();
}
List<Element> sites = urlStream.collect(Collectors.toList());
logger.info("Pages available for scraping: " + sites.size());
sites.forEach(u -> {
final Text key = new Text(u.text());
String nquads;
try {
String site = u.text();
logger.debug(site + " > parsing");
nquads = scraper.scrapeUrl(site, scrapingType);
final Text value = new Text(nquads);
writer.append(key, value);
} catch (Throwable t) {
logger.error(u.text() + " " + t.getMessage());
}
});
}
logger
.info(
"*************************** ENDING_SCRAPE: ");
}
public static Configuration getHadoopConfiguration(String nameNode) {
// ====== Init HDFS File System Object
Configuration conf = new Configuration();
// Set FileSystem URI
conf.set("fs.defaultFS", nameNode);
// Because of Maven
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
System.setProperty("hadoop.home.dir", "/");
return conf;
}
}

View File

@ -0,0 +1,94 @@
package eu.dnetlib.dhp.bmuse.utils;
import java.io.*;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.cli.*;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
public class ArgumentApplicationParser implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ArgumentApplicationParser.class);
private final Options options = new Options();
private final Map<String, String> objectMap = new HashMap<>();
private final List<String> compressedValues = new ArrayList<>();
public ArgumentApplicationParser(final String json_configuration) throws IOException {
final ObjectMapper mapper = new ObjectMapper();
final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
createOptionMap(configuration);
}
public ArgumentApplicationParser(final OptionsParameter[] configuration) {
createOptionMap(configuration);
}
private void createOptionMap(final OptionsParameter[] configuration) {
Arrays
.stream(configuration)
.map(
conf -> {
final Option o = new Option(conf.getParamName(), true, conf.getParamDescription());
o.setLongOpt(conf.getParamLongName());
o.setRequired(conf.isParamRequired());
if (conf.isCompressed()) {
compressedValues.add(conf.getParamLongName());
}
return o;
})
.forEach(options::addOption);
}
public static String decompressValue(final String abstractCompressed) {
try {
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(byteArray));
final StringWriter stringWriter = new StringWriter();
IOUtils.copy(gis, stringWriter);
return stringWriter.toString();
} catch (IOException e) {
log.error("Wrong value to decompress: {}", abstractCompressed);
throw new IllegalArgumentException(e);
}
}
public static String compressArgument(final String value) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(value.getBytes());
gzip.close();
return java.util.Base64.getEncoder().encodeToString(out.toByteArray());
}
public void parseArgument(final String[] args) throws ParseException {
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse(options, args);
Arrays
.stream(cmd.getOptions())
.forEach(
it -> objectMap
.put(
it.getLongOpt(),
compressedValues.contains(it.getLongOpt())
? decompressValue(it.getValue())
: it.getValue()));
}
public String get(final String key) {
return objectMap.get(key);
}
public Map<String, String> getObjectMap() {
return objectMap;
}
}

View File

@ -0,0 +1,91 @@
package eu.dnetlib.dhp.bmuse.utils;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import org.apache.any23.Any23;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.StringDocumentSource;
import org.apache.any23.writer.NTriplesWriter;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.Rio;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import hwu.elixir.scrape.exceptions.*;
import hwu.elixir.scrape.scraper.ScraperFilteredCore;
public class BMUSEScraper extends ScraperFilteredCore {
private static final Logger logger = LoggerFactory.getLogger(BMUSEScraper.class.getName());
public String scrapeUrl(String url, Boolean dynamic) throws Exception {
logger.debug(url + " > scraping");
url = fixURL(url);
String html = "";
// The dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information
// (dynamic and static respectively)
if (dynamic) {
html = wrapHTMLExtraction(url);
} else {
html = wrapHTMLExtractionStatic(url);
}
if (html == null || html.contentEquals(""))
throw new Exception("empty html");
html = injectId(html, url);
logger.debug(url + " > html scraped from " + url);
DocumentSource source = new StringDocumentSource(html, url);
String n3 = html2Triples(source, url);
if (n3 == null) {
throw new MissingMarkupException(url);
}
logger.debug(url + " > processing triples");
IRI sourceIRI = SimpleValueFactory.getInstance().createIRI(source.getDocumentIRI());
Model updatedModel = updatedModel = processTriples(n3, sourceIRI, 0l);
if (updatedModel == null) {
throw new Exception("rdf model null");
}
logger.debug(url + " > generating nquads");
try (StringWriter jsonLDWriter = new StringWriter()) {
Rio.write(updatedModel, jsonLDWriter, RDFFormat.NQUADS);
logger.debug(url + " > nquads generated");
return jsonLDWriter.toString();
} catch (Exception e) {
throw e;
}
}
private String html2Triples(DocumentSource source, String url) throws Exception {
Any23 runner = new Any23();
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
TripleHandler handler = new NTriplesWriter(out);) {
runner.extract(source, handler);
return out.toString("UTF-8");
} catch (ExtractionException e) {
logger.error("Cannot extract triples", e);
} catch (IOException e1) {
logger.error(" IO error whilst extracting triples", e1);
} catch (TripleHandlerException e2) {
logger.error("TripleHanderException", e2);
}
return null;
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.bmuse.utils;
public class OptionsParameter {
private String paramName;
private String paramLongName;
private String paramDescription;
private boolean paramRequired;
private boolean compressed;
public String getParamName() {
return paramName;
}
public String getParamLongName() {
return paramLongName;
}
public String getParamDescription() {
return paramDescription;
}
public boolean isParamRequired() {
return paramRequired;
}
public boolean isCompressed() {
return compressed;
}
public void setCompressed(boolean compressed) {
this.compressed = compressed;
}
}

View File

@ -0,0 +1,65 @@
package eu.dnetlib.dhp.bmuse.utils;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import hwu.elixir.utils.Helpers;
public class UrlParser {
private static final Logger logger = LoggerFactory.getLogger(UrlParser.class.getName());
public static Elements getSitemapList(String url, String sitemapURLKey) throws IOException {
Document doc = new Document(url);
Document urlSitemapListsNested;
Elements elements = new Elements();
Elements sitemaps = new Elements();
boolean sitemapindex = false;
boolean urlset = false;
try {
int urlLength = url.length();
logger.info("parse sitemap list");
String sitemapExt = url.substring(urlLength - 3, urlLength);
if (sitemapExt.equalsIgnoreCase(".gz")) { // this checks only the extension at the ending
logger.info("compressed sitemap");
byte[] bytes = Jsoup.connect(url).ignoreContentType(true).execute().bodyAsBytes();
doc = Helpers.gzipFileDecompression(bytes);
} else {
doc = Jsoup.connect(url).maxBodySize(0).get();
}
} catch (IOException e) {
logger.error("Jsoup parsing exception: " + e.getMessage());
}
try {
elements = doc.select(sitemapURLKey);
// check the html if it is a sitemapindex or a urlset
sitemapindex = doc.outerHtml().contains("sitemapindex");
urlset = doc.outerHtml().contains("urlset");
} catch (NullPointerException e) {
logger.error(e.getMessage());
}
if (sitemapindex) {
// if sitemapindex get the loc of all the sitemaps
// added warning for sitemap index files
logger
.warn(
"please note this is a sitemapindex file which is not currently supported, please use the content (url) of the urlset instead");
sitemaps = doc.select(sitemapURLKey);
}
return elements;
}
}

View File

@ -0,0 +1,44 @@
[
{
"paramName": "n",
"paramLongName": "nameNode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "the working path",
"paramRequired": true
},
{
"paramName": "r",
"paramLongName": "rdfOutput",
"paramDescription": "the working path",
"paramRequired": true
},
{
"paramName": "u",
"paramLongName": "sitemapUrl",
"paramDescription": "the sitemap url",
"paramRequired": true
},
{
"paramName": "k",
"paramLongName": "sitemapURLKey",
"paramDescription": "the sitemap file contains a list of xml entries, each one has a tag identified with sitemapURLKey with the url as value",
"paramRequired": true
},
{
"paramName": "d",
"paramLongName": "dynamic",
"paramDescription": "the dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information (dynamic and static respectively)",
"paramRequired": false
},
{
"paramName": "m",
"paramLongName": "maxScrapedPages",
"paramDescription": "max number of pages that will be scraped, default: no limit",
"paramRequired": false
}
]

View File

@ -0,0 +1,18 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarn</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -0,0 +1,80 @@
<workflow-app name="BioSchemaHarvester" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<value>/data/bioschema/ped/</value>
<description>the working path</description>
</property>
<property>
<name>sitemapUrl</name>
<value>https://proteinensemble.org/sitemap2.xml.gz</value>
</property>
<property>
<name>sitemapURLKey</name>
<value>loc</value>
</property>
<property>
<name>dynamic</name>
<value>true</value>
<description>the dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information (dynamic and static respectively)</description>
</property>
<property>
<name>maxScrapedPages</name>
<value>10</value>
<description>max number of pages that will be scraped, default: no limit</description>
</property>
<property>
<name>rdfOutput</name>
<value>nquads.seq</value>
<description>rdf output of scraping step</description>
</property>
<property>
<name>scraping_java_opts</name>
<value>-Xmx4g -Dwebdriver.chrome.whitelistedIps=</value>
<description>Used to configure the heap size for the map JVM process. Should be 80% of mapreduce.map.memory.mb.</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
</global>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}${rdfOutput}'/>
</fs>
<ok to="bmuseScraping"/>
<error to="Kill"/>
</action>
<action name="bmuseScraping">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.bmuse.bioschema.ScrapingJob</main-class>
<java-opts>${scraping_java_opts}</java-opts>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--rdfOutput</arg><arg>${rdfOutput}</arg>
<arg>--sitemapUrl</arg><arg>${sitemapUrl}</arg>
<arg>--sitemapURLKey</arg><arg>${sitemapURLKey}</arg>
<arg>--dynamic</arg><arg>${dynamic}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,4 @@
maxLimitScrape=200000
schemaContext=https\://schema.org/docs/jsonldcontext.jsonld
dynamic=true
chromiumDriverLocation=/bin/chromedriver

View File

@ -0,0 +1,9 @@
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=INFO, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

View File

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.bmuse.bioschema;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.any23.Any23;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.StringDocumentSource;
import org.apache.any23.writer.NTriplesWriter;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Html2TriplesTest {
static Logger logger = LoggerFactory.getLogger(Html2TriplesTest.class);
@Test
// @Disabled
void conversionTest() throws Exception {
InputStream is = Html2TriplesTest.class.getResourceAsStream("/eu/dnetlib/dhp/bmuse/bioschema/ped.html");
String page = IOUtils.toString(is, StandardCharsets.UTF_8.name());
DocumentSource source = new StringDocumentSource(page, "https://proteinensemble.org/PED00001");
Any23 runner = new Any23();
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
TripleHandler handler = new NTriplesWriter(out);) {
runner.extract(source, handler);
logger.info(out.toString("UTF-8"));
} catch (ExtractionException e) {
logger.error("Cannot extract triples", e);
} catch (IOException e1) {
logger.error(" IO error whilst extracting triples", e1);
} catch (TripleHandlerException e2) {
logger.error("TripleHanderException", e2);
}
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.bmuse.bioschema;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.bmuse.utils.UrlParser;
public class SitemapTest {
static Logger logger = LoggerFactory.getLogger(SitemapTest.class);
@Test
@Disabled
void sitemapGzTest() throws Exception {
Elements urls = UrlParser.getSitemapList("https://disprot.org/sitemap2.xml.gz", "loc");
urls.forEach(url -> {
logger.info(url.text());
});
}
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId>
<version>1.2.4-SNAPSHOT</version>
</parent>
<artifactId>dhp-rdfconverter</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23-core</artifactId>
<version>2.3</version>
</dependency>
<dependency>
<groupId>org.eclipse.rdf4j</groupId>
<artifactId>rdf4j-rio-rdfxml</artifactId>
<version>2.5.4</version>
</dependency>
<dependency>
<groupId>org.eclipse.rdf4j</groupId>
<artifactId>rdf4j-model</artifactId>
<version>2.5.4</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,72 @@
package eu.dnetlib.dhp.rdfconverter.bioschema;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.rdfconverter.utils.RDFConverter;
public class SparkRdfToDatacite {
static Logger logger = LoggerFactory.getLogger(SparkRdfToDatacite.class);
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SparkRdfToDatacite.class
.getResourceAsStream(
"/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json")));
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
final String workingPath = parser.get("workingPath");
final String rdfNquadsRecords = parser.get("rdfInput");
final String output = parser.get("output");
final String profile = parser.get("profile");
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
String rdfNquadsRecordsPath = workingPath.concat(rdfNquadsRecords);
JavaPairRDD<Text, Text> rdfNquadsRecordsRDD = sc
.sequenceFile(rdfNquadsRecordsPath, Text.class, Text.class);
logger.info("Rdf nquads records retrieved: {}", rdfNquadsRecordsRDD.count());
JavaRDD<Text> proteins = rdfNquadsRecordsRDD.flatMap(nquads -> {
RDFConverter converter = new RDFConverter();
ArrayList<String> jsonlds = null;
try {
jsonlds = converter.nQuadsFile2DataciteJson(nquads._2().toString(), profile);
} catch (Exception e) {
logger.error(nquads._1().toString(), e);
return Arrays.asList(new String()).iterator();
}
return jsonlds.iterator();
}).filter(Objects::nonNull).filter(jsonld -> !jsonld.isEmpty()).map(jsonld -> new Text(jsonld));
logger.info("json datacite generated: {}", proteins.count());
proteins.saveAsTextFile(workingPath.concat(output), GzipCodec.class);
});
}
}

View File

@ -0,0 +1,373 @@
package eu.dnetlib.dhp.rdfconverter.bioschema.model;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
@JsonIgnoreProperties(ignoreUnknown = true)
public class BioSchemaProtein {
@JsonProperty("@id")
private String id;
@JsonProperty("@graph")
private List<Entry> entryList;
@JsonProperty("http://purl.org/pav/retrievedOn")
private DateTimeType retrievedOn;
public static class Entry {
@JsonProperty("@id")
private String id;
@JsonProperty("@type")
private List<String> type;
@JsonProperty("https://schema.org/identifier")
private String identifier;
@JsonProperty("https://schema.org/name")
private String name;
@JsonProperty("associatedDisease")
private List<AssociatedDisease> associatedDisease;
@JsonProperty("description")
private String description;
@JsonProperty("isEncodedByBioChemEntity")
private String isEncodedByBioChemEntity;
@JsonProperty("url")
private String url;
@JsonProperty("alternateName")
private String alternateName;
@JsonProperty("bioChemInteraction")
private List<Link> bioChemInteraction;
@JsonProperty("bioChemSimilarity")
private List<Link> bioChemSimilarity;
@JsonProperty("hasMolecularFunction")
private String hasMolecularFunction;
@JsonProperty("image")
private String image;
@JsonProperty("isInvolvedInBiologicalProcess")
private String isInvolvedInBiologicalProcess;
@JsonProperty("isPartOfBioChemEntity")
private IsPartOfBioChemEntity isPartOfBioChemEntity;
@JsonProperty("mainEntityOfPage")
private Link mainEntityOfPage;
@JsonProperty("https://schema.org/citation")
private Citation citation;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<String> getType() {
return type;
}
public void setType(List<String> type) {
this.type = type;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
@JsonProperty("https://schema.org/sameAs")
private List<Link> sameAs;
public List<Link> getSameAs() {
return sameAs;
}
public void setSameAs(List<Link> sameAs) {
this.sameAs = sameAs;
}
public String getIdentifier() {
return identifier;
}
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getIsEncodedByBioChemEntity() {
return isEncodedByBioChemEntity;
}
public void setIsEncodedByBioChemEntity(String isEncodedByBioChemEntity) {
this.isEncodedByBioChemEntity = isEncodedByBioChemEntity;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getAlternateName() {
return alternateName;
}
public void setAlternateName(String alternateName) {
this.alternateName = alternateName;
}
public List<Link> getBioChemInteraction() {
return bioChemInteraction;
}
public void setBioChemInteraction(List<Link> bioChemInteraction) {
this.bioChemInteraction = bioChemInteraction;
}
public List<Link> getBioChemSimilarity() {
return bioChemSimilarity;
}
public void setBioChemSimilarity(List<Link> bioChemSimilarity) {
this.bioChemSimilarity = bioChemSimilarity;
}
public String getHasMolecularFunction() {
return hasMolecularFunction;
}
public void setHasMolecularFunction(String hasMolecularFunction) {
this.hasMolecularFunction = hasMolecularFunction;
}
public String getImage() {
return image;
}
public void setImage(String image) {
this.image = image;
}
public String getIsInvolvedInBiologicalProcess() {
return isInvolvedInBiologicalProcess;
}
public void setIsInvolvedInBiologicalProcess(String isInvolvedInBiologicalProcess) {
this.isInvolvedInBiologicalProcess = isInvolvedInBiologicalProcess;
}
public List<AssociatedDisease> getAssociatedDisease() {
return associatedDisease;
}
public void setAssociatedDisease(List<AssociatedDisease> associatedDisease) {
this.associatedDisease = associatedDisease;
}
public IsPartOfBioChemEntity getIsPartOfBioChemEntity() {
return isPartOfBioChemEntity;
}
public void setIsPartOfBioChemEntity(IsPartOfBioChemEntity isPartOfBioChemEntity) {
this.isPartOfBioChemEntity = isPartOfBioChemEntity;
}
public Link getMainEntityOfPage() {
return mainEntityOfPage;
}
public void setMainEntityOfPage(Link mainEntityOfPage) {
this.mainEntityOfPage = mainEntityOfPage;
}
public Citation getCitation() {
return citation;
}
public void setCitation(Citation citation) {
this.citation = citation;
}
}
public static class IsPartOfBioChemEntity {
@JsonProperty("@type")
private String type;
@JsonProperty("url")
private String url;
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
}
public static class AssociatedDisease {
@JsonProperty("@type")
private String type;
@JsonProperty("name")
private String name;
@JsonProperty("code")
private DeseaseCode code;
@JsonProperty("id")
private String id;
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public DeseaseCode getCode() {
return code;
}
public void setCode(DeseaseCode code) {
this.code = code;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
}
public static class DeseaseCode {
@JsonProperty("@type")
private String type;
@JsonProperty("codeValue")
private String codeValue;
@JsonProperty("codingSystem")
private String codingSystem;
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getCodeValue() {
return codeValue;
}
public void setCodeValue(String codeValue) {
this.codeValue = codeValue;
}
public String getCodingSystem() {
return codingSystem;
}
public void setCodingSystem(String codingSystem) {
this.codingSystem = codingSystem;
}
}
public static class Link {
@JsonProperty("@id")
private String id;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
}
public static class DateTimeType {
@JsonProperty("@type")
private String type;
@JsonProperty("@value")
private String value;
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
}
public static class Citation {
@JsonProperty("@id")
private String id;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<Entry> getEntryList() {
return entryList;
}
public void setEntryList(List<Entry> entryList) {
this.entryList = entryList;
}
public DateTimeType getRetrievedOn() {
return retrievedOn;
}
public void setRetrievedOn(DateTimeType retrievedOn) {
this.retrievedOn = retrievedOn;
}
}

View File

@ -0,0 +1,291 @@
package eu.dnetlib.dhp.rdfconverter.bioschema.model;
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
@JsonInclude(JsonInclude.Include.NON_NULL)
public class DataciteProtein {
private String id;
private String doi;
private Types types;
List<Creators> creators = new ArrayList<Creators>();
private String publisher;
private String publicationYear;
private static final String schemaVersion = "http://datacite.org/schema/kernel-4";
List<Identifier> identifiers = new ArrayList<Identifier>();
List<RelatedIdentifier> relatedIdentifiers = new ArrayList<RelatedIdentifier>();
List<AlternateIdentifier> alternateIdentifiers = new ArrayList<AlternateIdentifier>();
List<Description> descriptions = new ArrayList<Description>();
List<Title> titles = new ArrayList<Title>();
private List<DataciteDate> dates = new ArrayList<DataciteDate>();
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class Types {
private String resourceType;
private String resourceTypeGeneral;
public String getResourceType() {
return resourceType;
}
public void setResourceType(String resourceType) {
this.resourceType = resourceType;
}
public String getResourceTypeGeneral() {
return resourceTypeGeneral;
}
public void setResourceTypeGeneral(String resourceTypeGeneral) {
this.resourceTypeGeneral = resourceTypeGeneral;
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class Creators {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class Identifier {
private String identifier;
private String identifierType;
public String getIdentifier() {
return identifier;
}
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
public String getIdentifierType() {
return identifierType;
}
public void setIdentifierType(String identifierType) {
this.identifierType = identifierType;
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class RelatedIdentifier {
private String relationType;
private String relatedIdentifier;
private String relatedIdentifierType;
public String getRelationType() {
return relationType;
}
public void setRelationType(String relationType) {
this.relationType = relationType;
}
public String getRelatedIdentifier() {
return relatedIdentifier;
}
public void setRelatedIdentifier(String relatedIdentifier) {
this.relatedIdentifier = relatedIdentifier;
}
public String getRelatedIdentifierType() {
return relatedIdentifierType;
}
public void setRelatedIdentifierType(String relatedIdentifierType) {
this.relatedIdentifierType = relatedIdentifierType;
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class AlternateIdentifier {
private String alternateIdentifier;
private String alternateIdentifierType;
public String getAlternateIdentifier() {
return alternateIdentifier;
}
public void setAlternateIdentifier(String alternateIdentifier) {
this.alternateIdentifier = alternateIdentifier;
}
public String getAlternateIdentifierType() {
return alternateIdentifierType;
}
public void setAlternateIdentifierType(String alternateIdentifierType) {
this.alternateIdentifierType = alternateIdentifierType;
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class Description {
private String description;
private String descriptionType;
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getDescriptionType() {
return descriptionType;
}
public void setDescriptionType(String descriptionType) {
this.descriptionType = descriptionType;
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class Title {
private String title;
private String titleType;
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getTitleType() {
return titleType;
}
public void setTitleType(String titleType) {
this.titleType = titleType;
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class DataciteDate {
private String date;
private String dateType;
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getDateType() {
return dateType;
}
public void setDateType(String dateType) {
this.dateType = dateType;
}
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getDoi() {
return doi;
}
public void setDoi(String doi) {
this.doi = doi;
}
public Types getTypes() {
return types;
}
public void setTypes(Types types) {
this.types = types;
}
public List<Creators> getCreators() {
return creators;
}
public void setCreators(List<Creators> creators) {
this.creators = creators;
}
public String getPublisher() {
return publisher;
}
public void setPublisher(String publisher) {
this.publisher = publisher;
}
public String getPublicationYear() {
return publicationYear;
}
public void setPublicationYear(String publicationYear) {
this.publicationYear = publicationYear;
}
public static String getSchemaVersion() {
return schemaVersion;
}
public List<RelatedIdentifier> getRelatedIdentifiers() {
return relatedIdentifiers;
}
public void setRelatedIdentifiers(List<RelatedIdentifier> relatedIdentifiers) {
this.relatedIdentifiers = relatedIdentifiers;
}
public List<AlternateIdentifier> getAlternateIdentifiers() {
return alternateIdentifiers;
}
public void setAlternateIdentifiers(List<AlternateIdentifier> alternateIdentifiers) {
this.alternateIdentifiers = alternateIdentifiers;
}
public List<Description> getDescriptions() {
return descriptions;
}
public void setDescriptions(List<Description> descriptions) {
this.descriptions = descriptions;
}
public List<Title> getTitles() {
return titles;
}
public void setTitles(List<Title> titles) {
this.titles = titles;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public List<DataciteDate> getDates() {
return dates;
}
public void setDates(List<DataciteDate> dates) {
this.dates = dates;
}
}

View File

@ -0,0 +1,214 @@
package eu.dnetlib.dhp.rdfconverter.utils;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.*;
import java.util.stream.Collectors;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFWriter;
import org.eclipse.rdf4j.rio.Rio;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.jsonldjava.core.JsonLdOptions;
import com.github.jsonldjava.core.JsonLdProcessor;
import com.github.jsonldjava.utils.JsonUtils;
import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein;
import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein;
public class RDFConverter {
private static final Logger log = LoggerFactory.getLogger(RDFConverter.class);
public ArrayList<String> nQuadsFile2DataciteJson(String nquads, String profile) throws Exception {
if (profile.equals("Protein")) {
return nQuadsFile2DataciteJson(nquads);
}
throw new RuntimeException("Profile not supported");
}
private ArrayList<String> nQuadsFile2DataciteJson(String nquads) throws Exception {
StringReader reader = new StringReader(nquads);
Model model = Rio.parse(reader, "", RDFFormat.NQUADS);
StringWriter jsonLDWriter = new StringWriter();
RDFWriter rdfRecordWriter = Rio.createWriter(RDFFormat.JSONLD, jsonLDWriter);
Rio.write(model, rdfRecordWriter);
String jsonLDBuffer = jsonLDWriter.toString();
Object jsonObject = JsonUtils.fromString(jsonLDBuffer);
Object compact = JsonLdProcessor.compact(jsonObject, new HashMap<>(), new JsonLdOptions());
String compactContent = JsonUtils.toString(compact);
log.debug("jsonld: " + compactContent);
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY);
objectMapper.enable(DeserializationFeature.ACCEPT_EMPTY_STRING_AS_NULL_OBJECT);
objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
BioSchemaProtein bioSchemaProtein = objectMapper.readValue(compactContent, BioSchemaProtein.class);
log.debug("BioSchema id: " + bioSchemaProtein.getId());
BioSchemaProtein.DateTimeType retrievedOnType = bioSchemaProtein.getRetrievedOn();
List<String> citations = bioSchemaProtein.getEntryList().stream().map(entry -> {
if (entry.getCitation() != null) {
BioSchemaProtein.Citation citationInfo = entry.getCitation();
return citationInfo.getId();
}
return null;
}).filter(id -> id != null).collect(Collectors.toList());
ArrayList<String> results = new ArrayList<String>();
bioSchemaProtein.getEntryList().stream().forEach(entry -> {
if (entry.getType() != null
&& entry.getType().stream().filter(type -> type.equals("https://schema.org/Protein")).count() == 1) {
DataciteProtein dataciteProtein = new DataciteProtein();
citations.forEach(citation -> {
addRelatedIdentifier(dataciteProtein, citation, "CitedBy");
});
DataciteProtein.Types types = new DataciteProtein.Types();
types.setResourceType("Protein");
types.setResourceTypeGeneral("Dataset");
dataciteProtein.setTypes(types);
DataciteProtein.DataciteDate dataciteDate = new DataciteProtein.DataciteDate();
dataciteDate.setDate(retrievedOnType.getValue());
dataciteDate.setDateType("Collected");
dataciteProtein.getDates().add(dataciteDate);
if (entry.getName() != null) {
log.debug("Name: " + entry.getName());
DataciteProtein.Title title = new DataciteProtein.Title();
title.setTitle(entry.getName());
dataciteProtein.getTitles().add(title);
}
DataciteProtein.Identifier identifier = new DataciteProtein.Identifier();
log.debug("Id: " + entry.getId());
identifier.setIdentifier(entry.getId());
identifier.setIdentifierType("URL");
dataciteProtein.getIdentifiers().add(identifier);
if (entry.getIdentifier() != null) {
log.debug("Identifier: " + entry.getIdentifier());
addAlternateIdentifier(dataciteProtein, entry.getIdentifier());
}
if (entry.getDescription() != null) {
log.debug("description: " + entry.getDescription());
DataciteProtein.Description description = new DataciteProtein.Description();
description.setDescription(entry.getDescription());
dataciteProtein.getDescriptions().add(description);
}
if (entry.getIsEncodedByBioChemEntity() != null) {
log.debug("isEncodedByBioChemEntity: " + entry.getIsEncodedByBioChemEntity());
addRelatedIdentifier(dataciteProtein, entry.getIsEncodedByBioChemEntity(), "");
}
if (entry.getUrl() != null) {
log.debug("url: " + entry.getUrl());
addAlternateIdentifier(dataciteProtein, entry.getUrl());
}
if (entry.getAlternateName() != null) {
log.debug("alternateName: " + entry.getAlternateName());
DataciteProtein.Title title = new DataciteProtein.Title();
title.setTitle(entry.getAlternateName());
title.setTitleType("AlternativeTitle");
dataciteProtein.getTitles().add(title);
}
if (entry.getBioChemInteraction() != null) {
entry.getBioChemInteraction().stream().filter(Objects::nonNull).forEach(bc -> {
log.debug("bioChemInteraction: " + bc.getId());
addRelatedIdentifier(dataciteProtein, bc.getId(), "");
});
}
if (entry.getBioChemSimilarity() != null) {
entry.getBioChemSimilarity().stream().filter(Objects::nonNull).forEach(bc -> {
log.debug("bioChemSimilarity: " + bc.getId());
addRelatedIdentifier(dataciteProtein, bc.getId(), "");
});
}
if (entry.getHasMolecularFunction() != null) {
log.debug("hasMolecularFunction: " + entry.getHasMolecularFunction());
addRelatedIdentifier(dataciteProtein, entry.getHasMolecularFunction(), "");
}
if (entry.getIsInvolvedInBiologicalProcess() != null) {
log.debug("isInvolvedInBiologicalProcess: " + entry.getIsInvolvedInBiologicalProcess());
addRelatedIdentifier(dataciteProtein, entry.getIsInvolvedInBiologicalProcess(), "");
}
if (entry.getIsEncodedByBioChemEntity() != null) {
log.debug("isEncodedByBioChemEntity: " + entry.getIsEncodedByBioChemEntity());
addRelatedIdentifier(dataciteProtein, entry.getIsEncodedByBioChemEntity(), "");
}
if (entry.getIsPartOfBioChemEntity() != null) {
log.debug("isPartOfBioChemEntity: " + entry.getIsPartOfBioChemEntity());
addRelatedIdentifier(dataciteProtein, entry.getIsPartOfBioChemEntity().getUrl(), "");
}
if (entry.getSameAs() != null) {
entry.getSameAs().stream().filter(Objects::nonNull).forEach(sameAs -> {
log.debug("sameAs: " + sameAs.getId());
addRelatedIdentifier(dataciteProtein, sameAs.getId(), "IsIdenticalTo");
});
}
if (entry.getAssociatedDisease() != null) {
entry.getAssociatedDisease().stream().filter(Objects::nonNull).forEach(ad -> {
log.debug("associated disease: " + ad.getName());
addRelatedIdentifier(dataciteProtein, ad.getName(), "IsIdenticalTo");
});
}
String proteinId = "";
try {
String[] identifierParts = dataciteProtein.getIdentifiers().get(0).getIdentifier().split("/");
proteinId = identifierParts[identifierParts.length - 1];
} catch (Exception e) {
log.error("Identifier not found", e.getMessage());
}
dataciteProtein.setId(proteinId);
ObjectMapper mapper = new ObjectMapper();
try {
StringWriter writer = new StringWriter();
mapper.writeValue(writer, dataciteProtein);
results.add(writer.toString());
} catch (Exception e) {
throw new RuntimeException(e);
}
}
});
return results;
}
private void addRelatedIdentifier(DataciteProtein DataciteProtein, String relatedIdentifierValue,
String relationType) {
DataciteProtein.RelatedIdentifier relatedIdentifier = new DataciteProtein.RelatedIdentifier();
relatedIdentifier.setRelatedIdentifier(relatedIdentifierValue);
if (!relationType.isEmpty()) {
relatedIdentifier.setRelationType(relationType);
}
DataciteProtein.getRelatedIdentifiers().add(relatedIdentifier);
}
private void addAlternateIdentifier(DataciteProtein DataciteProtein, String alternateIdentifierValue) {
DataciteProtein.AlternateIdentifier alternateIdentifier = new DataciteProtein.AlternateIdentifier();
alternateIdentifier.setAlternateIdentifier(alternateIdentifierValue);
DataciteProtein.getAlternateIdentifiers().add(alternateIdentifier);
}
}

View File

@ -0,0 +1,32 @@
[
{
"paramName": "n",
"paramLongName": "nameNode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "the working path",
"paramRequired": true
},
{
"paramName": "i",
"paramLongName": "rdfInput",
"paramDescription": "sequence file inside working path that contains rdf records",
"paramRequired": true
},
{
"paramName": "o",
"paramLongName": "output",
"paramDescription": "relative path inside workingpath where bioschema dataset in datacite format will be stored",
"paramRequired": true
},
{
"paramName": "p",
"paramLongName": "profile",
"paramDescription": "the input data profile that has to be used for conversion (https://bioschemas.org/profiles/)",
"paramRequired": true
}
]

View File

@ -0,0 +1,65 @@
<configuration>
<!-- OCEAN -->
<!-- <property>-->
<!-- <name>jobTracker</name>-->
<!-- <value>yarnRM</value>-->
<!-- </property>-->
<!-- <property>-->
<!-- <name>nameNode</name>-->
<!-- <value>hdfs://nameservice1</value>-->
<!-- </property>-->
<!-- <property>-->
<!-- <name>hive_metastore_uris</name>-->
<!-- <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>-->
<!-- </property>-->
<!-- <property>-->
<!-- <name>spark2YarnHistoryServerAddress</name>-->
<!-- <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>-->
<!-- </property>-->
<!-- GARR -->
<property>
<name>jobTracker</name>
<value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://hadoop-edge3.garr-pa1.d4science.org:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://hadoop-rm2.garr-pa1.d4science.org:19888</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
</property>
</configuration>

View File

@ -0,0 +1,95 @@
<workflow-app name="RdfConverter" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<value>/data/bioschema/ped/</value>
<description>the working path</description>
</property>
<property>
<name>rdfInput</name>
<value>nquads.seq</value>
<description>rdf output of scraping workflow</description>
</property>
<property>
<name>output</name>
<value>json-datacite/</value>
</property>
<property>
<name>profile</name>
<value>Protein</value>
<description>the input data profile that has to be used for conversion (https://bioschemas.org/profiles/)</description>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4g</value>
</property>
<property>
<name>spark2RdfConversionMaxExecutors</name>
<value>50</value>
</property>
<property>
<name>sparkDriverMemory</name>
<value>7G</value>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<value>2G</value>
<description>memory for individual executor</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}${output}'/>
</fs>
<ok to="NquadsToDataciteJson"/>
<error to="Kill"/>
</action>
<action name="NquadsToDataciteJson">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>NquadsToDataciteJson</name>
<class>eu.dnetlib.dhp.rdfconverter.bioschema.SparkRdfToDatacite</class>
<jar>dhp-rdfconverter-${projectVersion}.jar</jar>
<spark-opts>
--conf spark.dynamicAllocation.enabled=true
--conf spark.dynamicAllocation.maxExecutors=${spark2RdfConversionMaxExecutors}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--rdfInput</arg><arg>${rdfInput}</arg>
<arg>--output</arg><arg>${output}</arg>
<arg>--profile</arg><arg>${profile}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,9 @@
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=INFO, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

View File

@ -0,0 +1,42 @@
package eu.dnetlib.dhp.rdfconverter.bioschema;
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.rdfconverter.utils.RDFConverter;
public class ConverterTest {
static Logger logger = LoggerFactory.getLogger(ConverterTest.class);
@Test
public void nqToDataciteTest() throws Exception {
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
String nq = IOUtils.toString(is);
logger.debug("NQ: " + nq);
RDFConverter converter = new RDFConverter();
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
if (results != null && !results.isEmpty()) {
logger.info("JSON DATACITE: " + results.get(0));
}
}
@Test
public void pedCitationTest() throws Exception {
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq");
String nq = IOUtils.toString(is);
logger.debug("NQ: " + nq);
RDFConverter converter = new RDFConverter();
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
if (results != null && !results.isEmpty()) {
logger.info("JSON DATACITE: " + results.get(0));
}
}
}

View File

@ -0,0 +1,52 @@
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/retrievedFrom> <https://disprot.org/DP01454> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/retrievedOn> "2021-11-25T12:23:57"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/createdWith> <https://github.com/HW-SWeL/BMUSE/releases/tag/0.5.2> .
<https://disprot.org/DP01454> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <https://schema.org/hasBioPolymerSequence> "MSTLFPSLFPRVTETLWFNLDRPCVEETELQQQEQQHQAWLQSIAEKDNNLVPIGKPASEHYDDEEEEDDEDDEDSEEDSEDDEDMQDMDEMNDYNESPDDGEVNEVDMEGNEQDQDQWMI" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <https://schema.org/hasSequenceAnnotation> <https://disprot.org/DP01454#disorder-content> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <https://schema.org/hasSequenceAnnotation> <https://disprot.org/DP01454r001> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <https://schema.org/identifier> "https://identifiers.org/disprot:DP01454" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <https://schema.org/includedInDataset> "https://disprot.org/#2021-08" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <https://schema.org/name> "Anaphase-promoting complex subunit 15" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P60006> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <https://schema.org/taxonomicRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/profiles/Protein/0.11-RELEASE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454#disorder-content> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454#disorder-content> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454#disorder-content> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/name> "Protein disorder content" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/propertyID> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00499> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/value> "5.371900826446281E-1" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://schema.org/rangeEnd> "121" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://schema.org/rangeStart> "1" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454r001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454r001> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454r001> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454r001> <https://schema.org/subjectOf> <https://identifiers.org/pubmed:26083744> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/name> "disorder" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/termCode> "IDPO:00076" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/assets/data/IDPO_v0.2.owl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/assets/data/IDPO_v0.2.owl> <https://schema.org/name> "IDP ontology" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://schema.org/rangeEnd> "121" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://schema.org/rangeStart> "57" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://identifiers.org/pubmed:26083744> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/ScholarlyArticle> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/inDefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <http://purl.uniprot.org/taxonomy/9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <https://identifiers.org/taxonomy:9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <http://purl.obolibrary.org/obo/NCBITaxon_9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/termCode> "9606" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/url> <http://purl.bioontology.org/ontology/NCBITAXON/9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://schema.org/name> "NCBI taxon" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://schema.org/url> <https://bioportal.bioontology.org/ontologies/NCBITAXON> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .
<https://disprot.org/DP01454> <http://purl.org/dc/terms/title> "DisProt" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> .

View File

@ -0,0 +1,229 @@
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> <http://purl.org/pav/retrievedFrom> <https://proteinensemble.org/PED00014> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> <http://purl.org/pav/retrievedOn> "2021-12-06T11:52:22"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> <http://purl.org/pav/createdWith> <https://github.com/HW-SWeL/BMUSE/releases/tag/0.5.2> .
<https://proteinensemble.org/PED00014> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CollectionPage> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014> <https://schema.org/citation> <https://identifiers.org/pubmed:20399186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014> <https://schema.org/identifier> "https://identifiers.org/ped:PED00014" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014> <https://schema.org/includedInDataset> "https://proteinensemble.org/#2021-02-12" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014> <https://schema.org/mainEntity> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014> <https://schema.org/name> "Structural ensemble of pSic1 (1-90) phosphorylated at Thr5, Thr33, Thr45, Ser69, Ser76 and Ser80, in complex with SKP1 (4-186) and CDC4 (270-744)" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://identifiers.org/pubmed:20399186> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/ScholarlyArticle> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/ItemList> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://schema.org/itemListElement> <https://proteinensemble.org/PED00014#P07834_D_0> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://schema.org/itemListElement> <https://proteinensemble.org/PED00014#P52286_C_0> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://schema.org/itemListElement> <https://proteinensemble.org/PED00014#P38634_A_1> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://schema.org/numberOfItems> "3" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/hasBioPolymerSequence> "LKRDLITSLPFEISLKIFNYLQFEDIINSLGVSQNWNKIIRKSTSLWKKLLISENFVSPKGFNSLNLKLSQKYPKLSQQDRLRLSFLENIFILKNWYNPKFVPQRTTLRGHMTSVITCLQFEDNYVITGADDKMIRVYDSINKKFLLQLSGHDGGVWALKYAHGGILVSGSTDRTVRVWDIKKGCCTHVFKGHNSTVRCLDIVEYKNIKYIVTGSRDNTLHVWKLPKESSVPDHGEEHDYPLVFHTPEENPYFVGVLRGHMASVRTVSGHGNIVVSGSYDNTLIVWDVAQMKCLYILSGHTDRIYSTIYDHERKRCISASMDTTIRIWDLENIWNNGECSYATNSASPCAKILGAMYTLQGHTALVGLLRLSDKFLVSAAADGSIRGWDANDYSRKFSYHHTNLSAITTFYVSDNILVSGSENQFNIYNLRSGKLVHANILKDADQIWSVNFKGKTLVAAVEKDGQSFLEILDFS" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/hasSequenceAnnotation> <https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/identifier> "https://identifiers.org/uniprot:P07834" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/name> "Cell division control protein 4" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P07834> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/profiles/Protein/0.11-RELEASE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/368314152> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2096895906> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1073956941> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/367565188> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1109163979> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/639913980> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/107821945> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2059079389> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1901071107> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/274002240> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/502163975> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1565361539> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/368314152> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/368314152> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/368314152> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://schema.org/name> "NMR" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://schema.org/termCode> "IDPO:00120" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTermSet> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl> <https://schema.org/name> "IDP ontology" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2096895906> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2096895906> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2096895906> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://schema.org/name> "RDC" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://schema.org/termCode> "IDPO:00166" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1073956941> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1073956941> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1073956941> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://schema.org/name> "chemical shift" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://schema.org/termCode> "IDPO:00167" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/367565188> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/367565188> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/367565188> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://schema.org/name> "relaxation" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://schema.org/termCode> "IDPO:00168" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1109163979> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1109163979> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1109163979> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://schema.org/name> "T2 relaxation" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://schema.org/termCode> "IDPO:00169" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/639913980> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/639913980> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/639913980> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://schema.org/name> "SAXS" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://schema.org/termCode> "IDPO:00125" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/107821945> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/107821945> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/107821945> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://schema.org/name> "TraDES" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://schema.org/termCode> "IDPO:00186" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2059079389> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2059079389> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2059079389> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://schema.org/name> "CNS" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://schema.org/termCode> "IDPO:00192" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1901071107> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1901071107> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1901071107> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://schema.org/name> "CRYSOL" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://schema.org/termCode> "IDPO:00208" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/274002240> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/274002240> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/274002240> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://schema.org/name> "ShiftX" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://schema.org/termCode> "IDPO:00210" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/502163975> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/502163975> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/502163975> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://schema.org/name> "ENSEMBLE" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://schema.org/termCode> "IDPO:00216" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1565361539> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1565361539> <https://schema.org/rangeEnd> "744" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1565361539> <https://schema.org/rangeStart> "270" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0> <https://schema.org/hasBioPolymerSequence> "SNVVLVSGEGERFTVDKKIAERSLLLKNYLNDMHDSNLQNNSDSESDSDSETNHKSKDNNNGDDDDEDDDEIVMPVPNVRSSVLQKVIEWAEHHRDSNFPDEDDDDSRKSAPVDSWDREFLKVDQEMLYEIILAANYLNIKPLLDAGCKVVAEMIRGRSPEEIRRTFNIVNDFTPEEEAAIRR" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0> <https://schema.org/hasSequenceAnnotation> <https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0> <https://schema.org/identifier> "https://identifiers.org/uniprot:P52286" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P52286> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/400036188> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1958752721> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1896099201> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1858414809> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/569393342> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/946591913> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/892712429> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/561273240> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/649186691> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1894898714> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1015020144> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/240836288> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/400036188> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/400036188> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/400036188> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1958752721> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1958752721> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1958752721> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1896099201> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1896099201> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1896099201> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1858414809> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1858414809> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1858414809> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/569393342> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/569393342> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/569393342> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/946591913> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/946591913> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/946591913> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/892712429> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/892712429> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/892712429> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/561273240> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/561273240> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/561273240> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/649186691> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/649186691> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/649186691> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1894898714> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1894898714> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1894898714> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1015020144> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1015020144> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1015020144> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/240836288> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/240836288> <https://schema.org/rangeEnd> "186" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/240836288> <https://schema.org/rangeStart> "4" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1> <https://schema.org/hasBioPolymerSequence> "MTPSTPPRSRGTRYLAQPSGNTSSSALMQGQKTPQKPSQNLVPVTPSTTKSFKNAPLLAPPNSNMGMTSPFNGLTSPQRSPFPKSSVKRT" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1> <https://schema.org/hasSequenceAnnotation> <https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1> <https://schema.org/identifier> "https://identifiers.org/uniprot:P38634" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P38634> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/420904342> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1005499560> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/803096160> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1785075011> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/943606610> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/696555349> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1412437827> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/837490728> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1021447597> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/725322807> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2067319117> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1503093711> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/420904342> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/420904342> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/420904342> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1005499560> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1005499560> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1005499560> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/803096160> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/803096160> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/803096160> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1785075011> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1785075011> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1785075011> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/943606610> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/943606610> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/943606610> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/696555349> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/696555349> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/696555349> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1412437827> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1412437827> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1412437827> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/837490728> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/837490728> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/837490728> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1021447597> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1021447597> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1021447597> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/725322807> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/725322807> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/725322807> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2067319117> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2067319117> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2067319117> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1503093711> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1503093711> <https://schema.org/rangeEnd> "90" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1503093711> <https://schema.org/rangeStart> "1" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .
<https://proteinensemble.org/PED00014> <http://purl.org/dc/terms/title> "PED" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> .

View File

@ -0,0 +1,11 @@
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=INFO, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.logger.org = ERROR
log4j.logger.eu.dnetlib = DEBUG
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n