diff --git a/dhp-workflows/dhp-bmuse/pom.xml b/dhp-workflows/dhp-bmuse/pom.xml new file mode 100644 index 000000000..6b81f4cba --- /dev/null +++ b/dhp-workflows/dhp-bmuse/pom.xml @@ -0,0 +1,96 @@ + + + 4.0.0 + + eu.dnetlib.dhp + dhp-workflows + 1.2.4-SNAPSHOT + + dhp-bmuse + + + + org.apache.spark + spark-core_2.11 + + + org.apache.spark + spark-sql_2.11 + + + hwu.elixir + bmuse-core + 0.5.4 + + + + org.apache.any23 + apache-any23-core + 2.3 + + + org.eclipse.rdf4j + rdf4j-rio-rdfxml + 3.7.1 + + + org.eclipse.rdf4j + rdf4j-model + 3.7.1 + + + + org.jsoup + jsoup + 1.13.1 + + + org.seleniumhq.selenium + selenium-java + 3.141.59 + + + commons-io + commons-io + 2.6 + + + commons-validator + commons-validator + 1.6 + + + + com.google.guava + guava + 22.0 + + + com.squareup.okhttp3 + okhttp + 3.11.0 + + + org.apache.commons + commons-compress + 1.18 + + + com.fasterxml.jackson.core + jackson-core + 2.9.6 + + + + com.fasterxml.jackson.core + jackson-annotations + 2.9.6 + + + com.fasterxml.jackson.core + jackson-databind + 2.9.6 + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/bioschema/ScrapingJob.java b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/bioschema/ScrapingJob.java new file mode 100644 index 000000000..0a1f897f0 --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/bioschema/ScrapingJob.java @@ -0,0 +1,112 @@ + +package eu.dnetlib.dhp.bmuse.bioschema; + +import java.text.SimpleDateFormat; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.bmuse.utils.ArgumentApplicationParser; +import eu.dnetlib.dhp.bmuse.utils.BMUSEScraper; +import eu.dnetlib.dhp.bmuse.utils.UrlParser; + +public class ScrapingJob { + + static Logger logger = LoggerFactory.getLogger(ScrapingJob.class); + + public static void main(String[] args) throws Exception { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + ScrapingJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json"))); + parser.parseArgument(args); + + final String nameNode = parser.get("nameNode"); + final String workingPath = parser.get("workingPath"); + final String rdfOutput = parser.get("rdfOutput"); + final String sitemapUrl = parser.get("sitemapUrl"); + final String sitemapURLKey = parser.get("sitemapURLKey"); + final String dynamic = parser.get("dynamic"); + final String maxScrapedPages = parser.get("maxScrapedPages"); + Boolean dynamicValue = true; + if (Objects.nonNull(dynamic)) { + dynamicValue = Boolean.parseBoolean(dynamic); + } + final boolean scrapingType = dynamicValue.booleanValue(); + + logger + .info( + "*************************** STARTING_SCRAPE"); + + BMUSEScraper scraper = new BMUSEScraper(); + String url = sitemapUrl.toLowerCase(); + Elements urls = UrlParser.getSitemapList(url, sitemapURLKey); + + Path output = new Path( + nameNode + .concat(workingPath) + .concat(rdfOutput)); + Configuration conf = getHadoopConfiguration(nameNode); + try (SequenceFile.Writer writer = SequenceFile + .createWriter( + conf, + SequenceFile.Writer.file(output), + SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(Text.class), + SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) { + Stream urlStream = null; + if (Objects.nonNull(maxScrapedPages)) { + urlStream = urls.stream().limit(Long.parseLong(maxScrapedPages)); + } else { + urlStream = urls.stream(); + } + List sites = urlStream.collect(Collectors.toList()); + logger.info("Pages available for scraping: " + sites.size()); + sites.forEach(u -> { + final Text key = new Text(u.text()); + String nquads; + try { + String site = u.text(); + logger.debug(site + " > parsing"); + nquads = scraper.scrapeUrl(site, scrapingType); + final Text value = new Text(nquads); + writer.append(key, value); + } catch (Throwable t) { + logger.error(u.text() + " " + t.getMessage()); + } + }); + } + + logger + .info( + "*************************** ENDING_SCRAPE: "); + } + + public static Configuration getHadoopConfiguration(String nameNode) { + // ====== Init HDFS File System Object + Configuration conf = new Configuration(); + // Set FileSystem URI + conf.set("fs.defaultFS", nameNode); + // Because of Maven + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + + System.setProperty("hadoop.home.dir", "/"); + return conf; + } +} diff --git a/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/ArgumentApplicationParser.java b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/ArgumentApplicationParser.java new file mode 100644 index 000000000..93efe93e9 --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/ArgumentApplicationParser.java @@ -0,0 +1,94 @@ + +package eu.dnetlib.dhp.bmuse.utils; + +import java.io.*; +import java.util.*; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import org.apache.commons.cli.*; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public class ArgumentApplicationParser implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(ArgumentApplicationParser.class); + + private final Options options = new Options(); + private final Map objectMap = new HashMap<>(); + + private final List compressedValues = new ArrayList<>(); + + public ArgumentApplicationParser(final String json_configuration) throws IOException { + final ObjectMapper mapper = new ObjectMapper(); + final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class); + createOptionMap(configuration); + } + + public ArgumentApplicationParser(final OptionsParameter[] configuration) { + createOptionMap(configuration); + } + + private void createOptionMap(final OptionsParameter[] configuration) { + Arrays + .stream(configuration) + .map( + conf -> { + final Option o = new Option(conf.getParamName(), true, conf.getParamDescription()); + o.setLongOpt(conf.getParamLongName()); + o.setRequired(conf.isParamRequired()); + if (conf.isCompressed()) { + compressedValues.add(conf.getParamLongName()); + } + return o; + }) + .forEach(options::addOption); + } + + public static String decompressValue(final String abstractCompressed) { + try { + byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes()); + GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(byteArray)); + final StringWriter stringWriter = new StringWriter(); + IOUtils.copy(gis, stringWriter); + return stringWriter.toString(); + } catch (IOException e) { + log.error("Wrong value to decompress: {}", abstractCompressed); + throw new IllegalArgumentException(e); + } + } + + public static String compressArgument(final String value) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + GZIPOutputStream gzip = new GZIPOutputStream(out); + gzip.write(value.getBytes()); + gzip.close(); + return java.util.Base64.getEncoder().encodeToString(out.toByteArray()); + } + + public void parseArgument(final String[] args) throws ParseException { + CommandLineParser parser = new BasicParser(); + CommandLine cmd = parser.parse(options, args); + Arrays + .stream(cmd.getOptions()) + .forEach( + it -> objectMap + .put( + it.getLongOpt(), + compressedValues.contains(it.getLongOpt()) + ? decompressValue(it.getValue()) + : it.getValue())); + } + + public String get(final String key) { + return objectMap.get(key); + } + + public Map getObjectMap() { + return objectMap; + } +} diff --git a/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/BMUSEScraper.java b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/BMUSEScraper.java new file mode 100644 index 000000000..c4924f4d3 --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/BMUSEScraper.java @@ -0,0 +1,91 @@ + +package eu.dnetlib.dhp.bmuse.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; + +import org.apache.any23.Any23; +import org.apache.any23.extractor.ExtractionException; +import org.apache.any23.source.DocumentSource; +import org.apache.any23.source.StringDocumentSource; +import org.apache.any23.writer.NTriplesWriter; +import org.apache.any23.writer.TripleHandler; +import org.apache.any23.writer.TripleHandlerException; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.Rio; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import hwu.elixir.scrape.exceptions.*; +import hwu.elixir.scrape.scraper.ScraperFilteredCore; + +public class BMUSEScraper extends ScraperFilteredCore { + + private static final Logger logger = LoggerFactory.getLogger(BMUSEScraper.class.getName()); + + public String scrapeUrl(String url, Boolean dynamic) throws Exception { + logger.debug(url + " > scraping"); + url = fixURL(url); + + String html = ""; + // The dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information + // (dynamic and static respectively) + + if (dynamic) { + html = wrapHTMLExtraction(url); + } else { + html = wrapHTMLExtractionStatic(url); + } + + if (html == null || html.contentEquals("")) + throw new Exception("empty html"); + + html = injectId(html, url); + + logger.debug(url + " > html scraped from " + url); + DocumentSource source = new StringDocumentSource(html, url); + String n3 = html2Triples(source, url); + if (n3 == null) { + throw new MissingMarkupException(url); + } + + logger.debug(url + " > processing triples"); + IRI sourceIRI = SimpleValueFactory.getInstance().createIRI(source.getDocumentIRI()); + Model updatedModel = updatedModel = processTriples(n3, sourceIRI, 0l); + if (updatedModel == null) { + throw new Exception("rdf model null"); + } + + logger.debug(url + " > generating nquads"); + try (StringWriter jsonLDWriter = new StringWriter()) { + Rio.write(updatedModel, jsonLDWriter, RDFFormat.NQUADS); + logger.debug(url + " > nquads generated"); + return jsonLDWriter.toString(); + } catch (Exception e) { + throw e; + } + } + + private String html2Triples(DocumentSource source, String url) throws Exception { + Any23 runner = new Any23(); + try (ByteArrayOutputStream out = new ByteArrayOutputStream(); + TripleHandler handler = new NTriplesWriter(out);) { + runner.extract(source, handler); + return out.toString("UTF-8"); + } catch (ExtractionException e) { + logger.error("Cannot extract triples", e); + } catch (IOException e1) { + logger.error(" IO error whilst extracting triples", e1); + } catch (TripleHandlerException e2) { + logger.error("TripleHanderException", e2); + } + return null; + } +} diff --git a/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/OptionsParameter.java b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/OptionsParameter.java new file mode 100644 index 000000000..f9dc13558 --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/OptionsParameter.java @@ -0,0 +1,35 @@ + +package eu.dnetlib.dhp.bmuse.utils; + +public class OptionsParameter { + + private String paramName; + private String paramLongName; + private String paramDescription; + private boolean paramRequired; + private boolean compressed; + + public String getParamName() { + return paramName; + } + + public String getParamLongName() { + return paramLongName; + } + + public String getParamDescription() { + return paramDescription; + } + + public boolean isParamRequired() { + return paramRequired; + } + + public boolean isCompressed() { + return compressed; + } + + public void setCompressed(boolean compressed) { + this.compressed = compressed; + } +} diff --git a/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/UrlParser.java b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/UrlParser.java new file mode 100644 index 000000000..c1c626b69 --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/java/eu/dnetlib/dhp/bmuse/utils/UrlParser.java @@ -0,0 +1,65 @@ + +package eu.dnetlib.dhp.bmuse.utils; + +import java.io.IOException; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import hwu.elixir.utils.Helpers; + +public class UrlParser { + + private static final Logger logger = LoggerFactory.getLogger(UrlParser.class.getName()); + + public static Elements getSitemapList(String url, String sitemapURLKey) throws IOException { + + Document doc = new Document(url); + Document urlSitemapListsNested; + Elements elements = new Elements(); + Elements sitemaps = new Elements(); + boolean sitemapindex = false; + boolean urlset = false; + + try { + int urlLength = url.length(); + logger.info("parse sitemap list"); + String sitemapExt = url.substring(urlLength - 3, urlLength); + if (sitemapExt.equalsIgnoreCase(".gz")) { // this checks only the extension at the ending + logger.info("compressed sitemap"); + byte[] bytes = Jsoup.connect(url).ignoreContentType(true).execute().bodyAsBytes(); + doc = Helpers.gzipFileDecompression(bytes); + } else { + doc = Jsoup.connect(url).maxBodySize(0).get(); + } + + } catch (IOException e) { + logger.error("Jsoup parsing exception: " + e.getMessage()); + } + + try { + + elements = doc.select(sitemapURLKey); + + // check the html if it is a sitemapindex or a urlset + sitemapindex = doc.outerHtml().contains("sitemapindex"); + urlset = doc.outerHtml().contains("urlset"); + } catch (NullPointerException e) { + logger.error(e.getMessage()); + } + + if (sitemapindex) { + // if sitemapindex get the loc of all the sitemaps + // added warning for sitemap index files + logger + .warn( + "please note this is a sitemapindex file which is not currently supported, please use the content (url) of the urlset instead"); + sitemaps = doc.select(sitemapURLKey); + } + + return elements; + } +} diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json new file mode 100644 index 000000000..1ac0b50de --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json @@ -0,0 +1,44 @@ +[ + { + "paramName": "n", + "paramLongName": "nameNode", + "paramDescription": "the Name Node URI", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "r", + "paramLongName": "rdfOutput", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "u", + "paramLongName": "sitemapUrl", + "paramDescription": "the sitemap url", + "paramRequired": true + }, + { + "paramName": "k", + "paramLongName": "sitemapURLKey", + "paramDescription": "the sitemap file contains a list of xml entries, each one has a tag identified with sitemapURLKey with the url as value", + "paramRequired": true + }, + { + "paramName": "d", + "paramLongName": "dynamic", + "paramDescription": "the dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information (dynamic and static respectively)", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "maxScrapedPages", + "paramDescription": "max number of pages that will be scraped, default: no limit", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/config-default.xml b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/config-default.xml new file mode 100644 index 000000000..bca528314 --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/config-default.xml @@ -0,0 +1,18 @@ + + + jobTracker + yarn + + + nameNode + hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/workflow.xml new file mode 100644 index 000000000..285801ea8 --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/oozie_app/workflow.xml @@ -0,0 +1,80 @@ + + + + workingPath + /data/bioschema/ped/ + the working path + + + sitemapUrl + https://proteinensemble.org/sitemap2.xml.gz + + + sitemapURLKey + loc + + + dynamic + true + the dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information (dynamic and static respectively) + + + maxScrapedPages + 10 + max number of pages that will be scraped, default: no limit + + + rdfOutput + nquads.seq + rdf output of scraping step + + + scraping_java_opts + -Xmx4g -Dwebdriver.chrome.whitelistedIps= + Used to configure the heap size for the map JVM process. Should be 80% of mapreduce.map.memory.mb. + + + + + ${jobTracker} + ${nameNode} + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.bmuse.bioschema.ScrapingJob + ${scraping_java_opts} + --nameNode${nameNode} + --workingPath${workingPath} + --rdfOutput${rdfOutput} + --sitemapUrl${sitemapUrl} + --sitemapURLKey${sitemapURLKey} + --dynamic${dynamic} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/localconfig.properties b/dhp-workflows/dhp-bmuse/src/main/resources/localconfig.properties new file mode 100644 index 000000000..26f94f2df --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/resources/localconfig.properties @@ -0,0 +1,4 @@ +maxLimitScrape=200000 +schemaContext=https\://schema.org/docs/jsonldcontext.jsonld +dynamic=true +chromiumDriverLocation=/bin/chromedriver \ No newline at end of file diff --git a/dhp-workflows/dhp-bmuse/src/main/resources/log4j.properties b/dhp-workflows/dhp-bmuse/src/main/resources/log4j.properties new file mode 100644 index 000000000..63cba917e --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/main/resources/log4j.properties @@ -0,0 +1,9 @@ +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=INFO, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n diff --git a/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/Html2TriplesTest.java b/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/Html2TriplesTest.java new file mode 100644 index 000000000..233e50d6e --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/Html2TriplesTest.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.bmuse.bioschema; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + +import org.apache.any23.Any23; +import org.apache.any23.extractor.ExtractionException; +import org.apache.any23.source.DocumentSource; +import org.apache.any23.source.StringDocumentSource; +import org.apache.any23.writer.NTriplesWriter; +import org.apache.any23.writer.TripleHandler; +import org.apache.any23.writer.TripleHandlerException; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class Html2TriplesTest { + + static Logger logger = LoggerFactory.getLogger(Html2TriplesTest.class); + + @Test +// @Disabled + void conversionTest() throws Exception { + InputStream is = Html2TriplesTest.class.getResourceAsStream("/eu/dnetlib/dhp/bmuse/bioschema/ped.html"); + String page = IOUtils.toString(is, StandardCharsets.UTF_8.name()); + DocumentSource source = new StringDocumentSource(page, "https://proteinensemble.org/PED00001"); + Any23 runner = new Any23(); + try (ByteArrayOutputStream out = new ByteArrayOutputStream(); + TripleHandler handler = new NTriplesWriter(out);) { + runner.extract(source, handler); + logger.info(out.toString("UTF-8")); + } catch (ExtractionException e) { + logger.error("Cannot extract triples", e); + } catch (IOException e1) { + logger.error(" IO error whilst extracting triples", e1); + } catch (TripleHandlerException e2) { + logger.error("TripleHanderException", e2); + } + + } +} diff --git a/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/SitemapTest.java b/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/SitemapTest.java new file mode 100644 index 000000000..2b87d069a --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/test/java/eu/dnetlib/dhp/bmuse/bioschema/SitemapTest.java @@ -0,0 +1,24 @@ + +package eu.dnetlib.dhp.bmuse.bioschema; + +import org.jsoup.select.Elements; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.bmuse.utils.UrlParser; + +public class SitemapTest { + + static Logger logger = LoggerFactory.getLogger(SitemapTest.class); + + @Test + @Disabled + void sitemapGzTest() throws Exception { + Elements urls = UrlParser.getSitemapList("https://disprot.org/sitemap2.xml.gz", "loc"); + urls.forEach(url -> { + logger.info(url.text()); + }); + } +} diff --git a/dhp-workflows/dhp-bmuse/src/test/resources/eu/dnetlib/dhp/bmuse/bioschema/ped.html b/dhp-workflows/dhp-bmuse/src/test/resources/eu/dnetlib/dhp/bmuse/bioschema/ped.html new file mode 100644 index 000000000..e9ca13caa --- /dev/null +++ b/dhp-workflows/dhp-bmuse/src/test/resources/eu/dnetlib/dhp/bmuse/bioschema/ped.html @@ -0,0 +1,37 @@ + + + PED + + + + + + + + + + + + + + + + + + + + +

PED00001 - Structural ensemble of pSic1 (1-90) with phosphorylations at Thr5, Thr33, Thr45, Ser69, Ser76, Ser80

Experiments' raw data bmrb:16659 link
Publication
Structure/function implications in a dynamic complex of the intrinsically disordered Sic1 with the Cdc4 subunit of an SCF ubiquitin ligase. Mittag T, Marsh J, Grishaev A, Orlicky S, Lin H, Sicheri F, Tyers M, Forman-Kay JD. Structure, 2010pubmed:20399186

NMR Experiments: All NMR data were collected on Varian Inova 500 MHz, 600 MHz and 800 MHz spectrometers at 5 ?C. The NMR samples were prepared in PBS and spectra were processed and analysed using NMRPipe/NMR Draw. Assignments and relaxation experiments were reported previously. NMR data is deposited in the BMRB with accession codes 16657 and 16659 for Sic1 and pSic1, respectively. PRE experiments and NH R2 NMR experiments were performed . The paramagnetic contribution to the transverse relaxation rate, (i.e. the paramagnetic relaxation enhancement, PRE), is the difference between transverse relaxation rates in paramagnetic and diamagnetic states. 1DHN RDCs were measured on 0.3 mM and 0.2 mM Sic1 and pSic1 samples, respectively. Couplings were extracted using ?Fuda: A function and data fitting and analysis package?. Errors were calculated from at least duplicate data sets. SAXS data collection: Small angle x-ray scattering data were acquired at the Beam Line 12-IDC at the Advanced Light Source synchrotron (Argonne National Laboratory, Argonne, IL). A total of 20 sequential data frames with exposure times of 0.25 seconds were recorded. Samples and buffers were flowing during data collection to prevent radiation damage. Individual data frames were converted from 2D to 1D profiles and normalized by the corresponding incident beam intensities. The final 1D scattering profiles and their uncertainties were then calculated as means and standard deviations over the 20 frames and then buffer data were subtracted from the sample data.

Ensemble models of intrinsically disordered Sic1 and pSic1 were calculated using essentially the same approach as was described (Marsh and Forman-Kay, 2009). Distance restraints were calculated from PRE measurements. SAXS profiles of the experimentally restrained ensembles were calculated by predicting scattering curves for each individual member using the program CRYSOL (Svergun et al., 1995) and averaged over the members of the ensemble. Chemical shifts were calculated from individual conformers using SHIFTX (Neal et al., 2003). RDCs were calculated using a local alignment approach, in which local alignment tensors are calculated for 15 residue fragments of the sequence in a sliding window fashion (Marsh et al., 2008). 15N R2 relaxation rates were compared to the number of heavy atoms in an 8 A ? radius of each measured nucleus, as previously described (Marsh and Forman-Kay, 2009). + The Sic1 and pSic1 ensemble models comprised residues 1-90 of the full-length Sic1 amino acid sequence plus an N-terminal Gly-Ser sequence remaining after tag cleavage. Glutamate residues were used to represent the phosphorylated residues in pSic1 to facilitate use of TraDES (Feldman and Hogue, 2000). These glutamate residues were converted to the proper phosphorylated threonine or serine residues for electrostatic calculations. Three independent ensembles were calculated for each of free Sic1 and pSic1 and the pSic1 complex. Calculations were performed on a cluster of CPUs, with one main node performing the core conformational selection calculations and 8-12 nodes performing the iterative conformational sampling with CNS (Bru? nger et al., 1998), Unfoldtraj, and TraDES (Feldman and Hogue, 2000). The initial temperature for the ENSEMBLE calculations was set to 10,000 and decreased to 0.01 in 200,000 steps. The starting ensembles contained 200 structures and the number of conformers comprising the ensembles was decreased by one after each successful ENSEMBLE calculation in which full agreement with experimental restraints was achieved. Calculations were stopped when a smaller ensemble could not be successfully calculated within 72 hr.

Cross reference disprot:DP00631 link
102030405060708090P38634 GSMTPSTPPRSRGTRYLAQPSGNTSSSALMQGQKTPQKPSQNLVPVTPSTTKSFKNAPLLAPPNSNMGMTSPFNGLTSPQRSPFPKSSVKRTChain APTMsPED00001e001 Secondary structure entropyPED00001e001 Relative ASA PED00001e002 Secondary structure entropyPED00001e002 Relative ASA PED00001e003 Secondary structure entropyPED00001e003 Relative ASA

Deposited ensemble

Ensemble ID PED00001e001 Number of models 11

Chain: A
Secondary structure entropy 0.40Relative solvent accessibility 0.68 Radius of gyration 26.74
Parsing... [0/1481071]

Ensemble ID PED00001e002 Number of models 10

Chain: A
Secondary structure entropy 0.39Relative solvent accessibility 0.70 Radius of gyration 26.71
Parsing... [1485862/1485862]

Ensemble ID PED00001e003 Number of models 11

Chain: A
Secondary structure entropy 0.44Relative solvent accessibility 0.70 Radius of gyration 28.15
Model 1 / 10
+ + + \ No newline at end of file diff --git a/dhp-workflows/dhp-rdfconverter/pom.xml b/dhp-workflows/dhp-rdfconverter/pom.xml new file mode 100644 index 000000000..77d48a1e9 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/pom.xml @@ -0,0 +1,42 @@ + + + 4.0.0 + + eu.dnetlib.dhp + dhp-workflows + 1.2.4-SNAPSHOT + + dhp-rdfconverter + + + + org.apache.spark + spark-core_2.11 + + + org.apache.spark + spark-sql_2.11 + + + eu.dnetlib.dhp + dhp-common + ${project.version} + + + org.apache.any23 + apache-any23-core + 2.3 + + + org.eclipse.rdf4j + rdf4j-rio-rdfxml + 2.5.4 + + + org.eclipse.rdf4j + rdf4j-model + 2.5.4 + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/SparkRdfToDatacite.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/SparkRdfToDatacite.java new file mode 100644 index 000000000..d3acd029c --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/SparkRdfToDatacite.java @@ -0,0 +1,72 @@ + +package eu.dnetlib.dhp.rdfconverter.bioschema; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.rdfconverter.utils.RDFConverter; + +public class SparkRdfToDatacite { + + static Logger logger = LoggerFactory.getLogger(SparkRdfToDatacite.class); + + public static void main(String[] args) throws Exception { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkRdfToDatacite.class + .getResourceAsStream( + "/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json"))); + parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + final String workingPath = parser.get("workingPath"); + final String rdfNquadsRecords = parser.get("rdfInput"); + final String output = parser.get("output"); + final String profile = parser.get("profile"); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + String rdfNquadsRecordsPath = workingPath.concat(rdfNquadsRecords); + JavaPairRDD rdfNquadsRecordsRDD = sc + .sequenceFile(rdfNquadsRecordsPath, Text.class, Text.class); + logger.info("Rdf nquads records retrieved: {}", rdfNquadsRecordsRDD.count()); + + JavaRDD proteins = rdfNquadsRecordsRDD.flatMap(nquads -> { + RDFConverter converter = new RDFConverter(); + ArrayList jsonlds = null; + try { + jsonlds = converter.nQuadsFile2DataciteJson(nquads._2().toString(), profile); + } catch (Exception e) { + logger.error(nquads._1().toString(), e); + return Arrays.asList(new String()).iterator(); + } + return jsonlds.iterator(); + }).filter(Objects::nonNull).filter(jsonld -> !jsonld.isEmpty()).map(jsonld -> new Text(jsonld)); + logger.info("json datacite generated: {}", proteins.count()); + proteins.saveAsTextFile(workingPath.concat(output), GzipCodec.class); + }); + } +} diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java new file mode 100644 index 000000000..962644c07 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java @@ -0,0 +1,373 @@ + +package eu.dnetlib.dhp.rdfconverter.bioschema.model; + +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class BioSchemaProtein { + @JsonProperty("@id") + private String id; + @JsonProperty("@graph") + private List entryList; + @JsonProperty("http://purl.org/pav/retrievedOn") + private DateTimeType retrievedOn; + + public static class Entry { + @JsonProperty("@id") + private String id; + @JsonProperty("@type") + private List type; + @JsonProperty("https://schema.org/identifier") + private String identifier; + @JsonProperty("https://schema.org/name") + private String name; + @JsonProperty("associatedDisease") + private List associatedDisease; + @JsonProperty("description") + private String description; + @JsonProperty("isEncodedByBioChemEntity") + private String isEncodedByBioChemEntity; + @JsonProperty("url") + private String url; + @JsonProperty("alternateName") + private String alternateName; + @JsonProperty("bioChemInteraction") + private List bioChemInteraction; + @JsonProperty("bioChemSimilarity") + private List bioChemSimilarity; + @JsonProperty("hasMolecularFunction") + private String hasMolecularFunction; + @JsonProperty("image") + private String image; + @JsonProperty("isInvolvedInBiologicalProcess") + private String isInvolvedInBiologicalProcess; + @JsonProperty("isPartOfBioChemEntity") + private IsPartOfBioChemEntity isPartOfBioChemEntity; + @JsonProperty("mainEntityOfPage") + private Link mainEntityOfPage; + @JsonProperty("https://schema.org/citation") + private Citation citation; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public List getType() { + return type; + } + + public void setType(List type) { + this.type = type; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + @JsonProperty("https://schema.org/sameAs") + private List sameAs; + + public List getSameAs() { + return sameAs; + } + + public void setSameAs(List sameAs) { + this.sameAs = sameAs; + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getIsEncodedByBioChemEntity() { + return isEncodedByBioChemEntity; + } + + public void setIsEncodedByBioChemEntity(String isEncodedByBioChemEntity) { + this.isEncodedByBioChemEntity = isEncodedByBioChemEntity; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getAlternateName() { + return alternateName; + } + + public void setAlternateName(String alternateName) { + this.alternateName = alternateName; + } + + public List getBioChemInteraction() { + return bioChemInteraction; + } + + public void setBioChemInteraction(List bioChemInteraction) { + this.bioChemInteraction = bioChemInteraction; + } + + public List getBioChemSimilarity() { + return bioChemSimilarity; + } + + public void setBioChemSimilarity(List bioChemSimilarity) { + this.bioChemSimilarity = bioChemSimilarity; + } + + public String getHasMolecularFunction() { + return hasMolecularFunction; + } + + public void setHasMolecularFunction(String hasMolecularFunction) { + this.hasMolecularFunction = hasMolecularFunction; + } + + public String getImage() { + return image; + } + + public void setImage(String image) { + this.image = image; + } + + public String getIsInvolvedInBiologicalProcess() { + return isInvolvedInBiologicalProcess; + } + + public void setIsInvolvedInBiologicalProcess(String isInvolvedInBiologicalProcess) { + this.isInvolvedInBiologicalProcess = isInvolvedInBiologicalProcess; + } + + public List getAssociatedDisease() { + return associatedDisease; + } + + public void setAssociatedDisease(List associatedDisease) { + this.associatedDisease = associatedDisease; + } + + public IsPartOfBioChemEntity getIsPartOfBioChemEntity() { + return isPartOfBioChemEntity; + } + + public void setIsPartOfBioChemEntity(IsPartOfBioChemEntity isPartOfBioChemEntity) { + this.isPartOfBioChemEntity = isPartOfBioChemEntity; + } + + public Link getMainEntityOfPage() { + return mainEntityOfPage; + } + + public void setMainEntityOfPage(Link mainEntityOfPage) { + this.mainEntityOfPage = mainEntityOfPage; + } + + public Citation getCitation() { + return citation; + } + + public void setCitation(Citation citation) { + this.citation = citation; + } + } + + public static class IsPartOfBioChemEntity { + @JsonProperty("@type") + private String type; + @JsonProperty("url") + private String url; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + } + + public static class AssociatedDisease { + @JsonProperty("@type") + private String type; + @JsonProperty("name") + private String name; + @JsonProperty("code") + private DeseaseCode code; + @JsonProperty("id") + private String id; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public DeseaseCode getCode() { + return code; + } + + public void setCode(DeseaseCode code) { + this.code = code; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + } + + public static class DeseaseCode { + @JsonProperty("@type") + private String type; + @JsonProperty("codeValue") + private String codeValue; + @JsonProperty("codingSystem") + private String codingSystem; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getCodeValue() { + return codeValue; + } + + public void setCodeValue(String codeValue) { + this.codeValue = codeValue; + } + + public String getCodingSystem() { + return codingSystem; + } + + public void setCodingSystem(String codingSystem) { + this.codingSystem = codingSystem; + } + } + + public static class Link { + @JsonProperty("@id") + private String id; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + } + + public static class DateTimeType { + @JsonProperty("@type") + private String type; + @JsonProperty("@value") + private String value; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } + } + + public static class Citation { + @JsonProperty("@id") + private String id; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public List getEntryList() { + return entryList; + } + + public void setEntryList(List entryList) { + this.entryList = entryList; + } + + public DateTimeType getRetrievedOn() { + return retrievedOn; + } + + public void setRetrievedOn(DateTimeType retrievedOn) { + this.retrievedOn = retrievedOn; + } +} diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/DataciteProtein.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/DataciteProtein.java new file mode 100644 index 000000000..6fe0963e2 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/DataciteProtein.java @@ -0,0 +1,291 @@ + +package eu.dnetlib.dhp.rdfconverter.bioschema.model; + +import java.util.ArrayList; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonInclude; + +@JsonInclude(JsonInclude.Include.NON_NULL) +public class DataciteProtein { + private String id; + private String doi; + private Types types; + List creators = new ArrayList(); + private String publisher; + private String publicationYear; + private static final String schemaVersion = "http://datacite.org/schema/kernel-4"; + List identifiers = new ArrayList(); + List relatedIdentifiers = new ArrayList(); + List alternateIdentifiers = new ArrayList(); + List descriptions = new ArrayList(); + List titles = new ArrayList<Title>(); + private List<DataciteDate> dates = new ArrayList<DataciteDate>(); + + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Types { + private String resourceType; + private String resourceTypeGeneral; + + public String getResourceType() { + return resourceType; + } + + public void setResourceType(String resourceType) { + this.resourceType = resourceType; + } + + public String getResourceTypeGeneral() { + return resourceTypeGeneral; + } + + public void setResourceTypeGeneral(String resourceTypeGeneral) { + this.resourceTypeGeneral = resourceTypeGeneral; + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Creators { + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Identifier { + private String identifier; + private String identifierType; + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public String getIdentifierType() { + return identifierType; + } + + public void setIdentifierType(String identifierType) { + this.identifierType = identifierType; + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class RelatedIdentifier { + private String relationType; + private String relatedIdentifier; + private String relatedIdentifierType; + + public String getRelationType() { + return relationType; + } + + public void setRelationType(String relationType) { + this.relationType = relationType; + } + + public String getRelatedIdentifier() { + return relatedIdentifier; + } + + public void setRelatedIdentifier(String relatedIdentifier) { + this.relatedIdentifier = relatedIdentifier; + } + + public String getRelatedIdentifierType() { + return relatedIdentifierType; + } + + public void setRelatedIdentifierType(String relatedIdentifierType) { + this.relatedIdentifierType = relatedIdentifierType; + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class AlternateIdentifier { + private String alternateIdentifier; + private String alternateIdentifierType; + + public String getAlternateIdentifier() { + return alternateIdentifier; + } + + public void setAlternateIdentifier(String alternateIdentifier) { + this.alternateIdentifier = alternateIdentifier; + } + + public String getAlternateIdentifierType() { + return alternateIdentifierType; + } + + public void setAlternateIdentifierType(String alternateIdentifierType) { + this.alternateIdentifierType = alternateIdentifierType; + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Description { + private String description; + private String descriptionType; + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getDescriptionType() { + return descriptionType; + } + + public void setDescriptionType(String descriptionType) { + this.descriptionType = descriptionType; + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Title { + private String title; + private String titleType; + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitleType() { + return titleType; + } + + public void setTitleType(String titleType) { + this.titleType = titleType; + } + } + + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class DataciteDate { + private String date; + private String dateType; + + public String getDate() { + return date; + } + + public void setDate(String date) { + this.date = date; + } + + public String getDateType() { + return dateType; + } + + public void setDateType(String dateType) { + this.dateType = dateType; + } + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getDoi() { + return doi; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + public Types getTypes() { + return types; + } + + public void setTypes(Types types) { + this.types = types; + } + + public List<Creators> getCreators() { + return creators; + } + + public void setCreators(List<Creators> creators) { + this.creators = creators; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublicationYear() { + return publicationYear; + } + + public void setPublicationYear(String publicationYear) { + this.publicationYear = publicationYear; + } + + public static String getSchemaVersion() { + return schemaVersion; + } + + public List<RelatedIdentifier> getRelatedIdentifiers() { + return relatedIdentifiers; + } + + public void setRelatedIdentifiers(List<RelatedIdentifier> relatedIdentifiers) { + this.relatedIdentifiers = relatedIdentifiers; + } + + public List<AlternateIdentifier> getAlternateIdentifiers() { + return alternateIdentifiers; + } + + public void setAlternateIdentifiers(List<AlternateIdentifier> alternateIdentifiers) { + this.alternateIdentifiers = alternateIdentifiers; + } + + public List<Description> getDescriptions() { + return descriptions; + } + + public void setDescriptions(List<Description> descriptions) { + this.descriptions = descriptions; + } + + public List<Title> getTitles() { + return titles; + } + + public void setTitles(List<Title> titles) { + this.titles = titles; + } + + public List<Identifier> getIdentifiers() { + return identifiers; + } + + public void setIdentifiers(List<Identifier> identifiers) { + this.identifiers = identifiers; + } + + public List<DataciteDate> getDates() { + return dates; + } + + public void setDates(List<DataciteDate> dates) { + this.dates = dates; + } +} diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java new file mode 100644 index 000000000..d57a548bf --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java @@ -0,0 +1,214 @@ + +package eu.dnetlib.dhp.rdfconverter.utils; + +import java.io.StringReader; +import java.io.StringWriter; +import java.util.*; +import java.util.stream.Collectors; + +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.RDFWriter; +import org.eclipse.rdf4j.rio.Rio; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.jsonldjava.core.JsonLdOptions; +import com.github.jsonldjava.core.JsonLdProcessor; +import com.github.jsonldjava.utils.JsonUtils; + +import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein; +import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein; + +public class RDFConverter { + + private static final Logger log = LoggerFactory.getLogger(RDFConverter.class); + + public ArrayList<String> nQuadsFile2DataciteJson(String nquads, String profile) throws Exception { + if (profile.equals("Protein")) { + return nQuadsFile2DataciteJson(nquads); + } + throw new RuntimeException("Profile not supported"); + } + + private ArrayList<String> nQuadsFile2DataciteJson(String nquads) throws Exception { + StringReader reader = new StringReader(nquads); + Model model = Rio.parse(reader, "", RDFFormat.NQUADS); + StringWriter jsonLDWriter = new StringWriter(); + RDFWriter rdfRecordWriter = Rio.createWriter(RDFFormat.JSONLD, jsonLDWriter); + Rio.write(model, rdfRecordWriter); + String jsonLDBuffer = jsonLDWriter.toString(); + Object jsonObject = JsonUtils.fromString(jsonLDBuffer); + Object compact = JsonLdProcessor.compact(jsonObject, new HashMap<>(), new JsonLdOptions()); + String compactContent = JsonUtils.toString(compact); + log.debug("jsonld: " + compactContent); + + ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY); + objectMapper.enable(DeserializationFeature.ACCEPT_EMPTY_STRING_AS_NULL_OBJECT); + objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + BioSchemaProtein bioSchemaProtein = objectMapper.readValue(compactContent, BioSchemaProtein.class); + log.debug("BioSchema id: " + bioSchemaProtein.getId()); + BioSchemaProtein.DateTimeType retrievedOnType = bioSchemaProtein.getRetrievedOn(); + + List<String> citations = bioSchemaProtein.getEntryList().stream().map(entry -> { + if (entry.getCitation() != null) { + BioSchemaProtein.Citation citationInfo = entry.getCitation(); + return citationInfo.getId(); + } + return null; + }).filter(id -> id != null).collect(Collectors.toList()); + + ArrayList<String> results = new ArrayList<String>(); + bioSchemaProtein.getEntryList().stream().forEach(entry -> { + + if (entry.getType() != null + && entry.getType().stream().filter(type -> type.equals("https://schema.org/Protein")).count() == 1) { + + DataciteProtein dataciteProtein = new DataciteProtein(); + + citations.forEach(citation -> { + addRelatedIdentifier(dataciteProtein, citation, "CitedBy"); + }); + + DataciteProtein.Types types = new DataciteProtein.Types(); + types.setResourceType("Protein"); + types.setResourceTypeGeneral("Dataset"); + dataciteProtein.setTypes(types); + + DataciteProtein.DataciteDate dataciteDate = new DataciteProtein.DataciteDate(); + dataciteDate.setDate(retrievedOnType.getValue()); + dataciteDate.setDateType("Collected"); + dataciteProtein.getDates().add(dataciteDate); + + if (entry.getName() != null) { + log.debug("Name: " + entry.getName()); + DataciteProtein.Title title = new DataciteProtein.Title(); + title.setTitle(entry.getName()); + dataciteProtein.getTitles().add(title); + } + DataciteProtein.Identifier identifier = new DataciteProtein.Identifier(); + log.debug("Id: " + entry.getId()); + identifier.setIdentifier(entry.getId()); + identifier.setIdentifierType("URL"); + dataciteProtein.getIdentifiers().add(identifier); + + if (entry.getIdentifier() != null) { + log.debug("Identifier: " + entry.getIdentifier()); + addAlternateIdentifier(dataciteProtein, entry.getIdentifier()); + } + + if (entry.getDescription() != null) { + log.debug("description: " + entry.getDescription()); + DataciteProtein.Description description = new DataciteProtein.Description(); + description.setDescription(entry.getDescription()); + dataciteProtein.getDescriptions().add(description); + } + + if (entry.getIsEncodedByBioChemEntity() != null) { + log.debug("isEncodedByBioChemEntity: " + entry.getIsEncodedByBioChemEntity()); + addRelatedIdentifier(dataciteProtein, entry.getIsEncodedByBioChemEntity(), ""); + } + + if (entry.getUrl() != null) { + log.debug("url: " + entry.getUrl()); + addAlternateIdentifier(dataciteProtein, entry.getUrl()); + } + + if (entry.getAlternateName() != null) { + log.debug("alternateName: " + entry.getAlternateName()); + DataciteProtein.Title title = new DataciteProtein.Title(); + title.setTitle(entry.getAlternateName()); + title.setTitleType("AlternativeTitle"); + dataciteProtein.getTitles().add(title); + } + + if (entry.getBioChemInteraction() != null) { + entry.getBioChemInteraction().stream().filter(Objects::nonNull).forEach(bc -> { + log.debug("bioChemInteraction: " + bc.getId()); + addRelatedIdentifier(dataciteProtein, bc.getId(), ""); + }); + } + + if (entry.getBioChemSimilarity() != null) { + entry.getBioChemSimilarity().stream().filter(Objects::nonNull).forEach(bc -> { + log.debug("bioChemSimilarity: " + bc.getId()); + addRelatedIdentifier(dataciteProtein, bc.getId(), ""); + }); + } + + if (entry.getHasMolecularFunction() != null) { + log.debug("hasMolecularFunction: " + entry.getHasMolecularFunction()); + addRelatedIdentifier(dataciteProtein, entry.getHasMolecularFunction(), ""); + } + + if (entry.getIsInvolvedInBiologicalProcess() != null) { + log.debug("isInvolvedInBiologicalProcess: " + entry.getIsInvolvedInBiologicalProcess()); + addRelatedIdentifier(dataciteProtein, entry.getIsInvolvedInBiologicalProcess(), ""); + } + + if (entry.getIsEncodedByBioChemEntity() != null) { + log.debug("isEncodedByBioChemEntity: " + entry.getIsEncodedByBioChemEntity()); + addRelatedIdentifier(dataciteProtein, entry.getIsEncodedByBioChemEntity(), ""); + } + + if (entry.getIsPartOfBioChemEntity() != null) { + log.debug("isPartOfBioChemEntity: " + entry.getIsPartOfBioChemEntity()); + addRelatedIdentifier(dataciteProtein, entry.getIsPartOfBioChemEntity().getUrl(), ""); + } + + if (entry.getSameAs() != null) { + entry.getSameAs().stream().filter(Objects::nonNull).forEach(sameAs -> { + log.debug("sameAs: " + sameAs.getId()); + addRelatedIdentifier(dataciteProtein, sameAs.getId(), "IsIdenticalTo"); + }); + } + + if (entry.getAssociatedDisease() != null) { + entry.getAssociatedDisease().stream().filter(Objects::nonNull).forEach(ad -> { + log.debug("associated disease: " + ad.getName()); + addRelatedIdentifier(dataciteProtein, ad.getName(), "IsIdenticalTo"); + }); + } + + String proteinId = ""; + try { + String[] identifierParts = dataciteProtein.getIdentifiers().get(0).getIdentifier().split("/"); + proteinId = identifierParts[identifierParts.length - 1]; + } catch (Exception e) { + log.error("Identifier not found", e.getMessage()); + } + + dataciteProtein.setId(proteinId); + + ObjectMapper mapper = new ObjectMapper(); + try { + StringWriter writer = new StringWriter(); + mapper.writeValue(writer, dataciteProtein); + results.add(writer.toString()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }); + return results; + } + + private void addRelatedIdentifier(DataciteProtein DataciteProtein, String relatedIdentifierValue, + String relationType) { + DataciteProtein.RelatedIdentifier relatedIdentifier = new DataciteProtein.RelatedIdentifier(); + relatedIdentifier.setRelatedIdentifier(relatedIdentifierValue); + if (!relationType.isEmpty()) { + relatedIdentifier.setRelationType(relationType); + } + DataciteProtein.getRelatedIdentifiers().add(relatedIdentifier); + } + + private void addAlternateIdentifier(DataciteProtein DataciteProtein, String alternateIdentifierValue) { + DataciteProtein.AlternateIdentifier alternateIdentifier = new DataciteProtein.AlternateIdentifier(); + alternateIdentifier.setAlternateIdentifier(alternateIdentifierValue); + DataciteProtein.getAlternateIdentifiers().add(alternateIdentifier); + } +} diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json new file mode 100644 index 000000000..4a4869ef0 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/generate_dataset.json @@ -0,0 +1,32 @@ +[ + { + "paramName": "n", + "paramLongName": "nameNode", + "paramDescription": "the Name Node URI", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "rdfInput", + "paramDescription": "sequence file inside working path that contains rdf records", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "output", + "paramDescription": "relative path inside workingpath where bioschema dataset in datacite format will be stored", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "profile", + "paramDescription": "the input data profile that has to be used for conversion (https://bioschemas.org/profiles/)", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/config-default.xml b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/config-default.xml new file mode 100644 index 000000000..95d48af07 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/config-default.xml @@ -0,0 +1,65 @@ +<configuration> + + <!-- OCEAN --> + +<!-- <property>--> +<!-- <name>jobTracker</name>--> +<!-- <value>yarnRM</value>--> +<!-- </property>--> +<!-- <property>--> +<!-- <name>nameNode</name>--> +<!-- <value>hdfs://nameservice1</value>--> +<!-- </property>--> +<!-- <property>--> +<!-- <name>hive_metastore_uris</name>--> +<!-- <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>--> +<!-- </property>--> +<!-- <property>--> +<!-- <name>spark2YarnHistoryServerAddress</name>--> +<!-- <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>--> +<!-- </property>--> + + + <!-- GARR --> + + <property> + <name>jobTracker</name> + <value>hadoop-rm3.garr-pa1.d4science.org:8032</value> + </property> + <property> + <name>nameNode</name> + <value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value> + </property> + <property> + <name>hive_metastore_uris</name> + <value>thrift://hadoop-edge3.garr-pa1.d4science.org:9083</value> + </property> + <property> + <name>spark2YarnHistoryServerAddress</name> + <value>http://hadoop-rm2.garr-pa1.d4science.org:19888</value> + </property> + <property> + <name>oozie.launcher.mapreduce.user.classpath.first</name> + <value>true</value> + </property> + <property> + <name>oozie.use.system.libpath</name> + <value>true</value> + </property> + <property> + <name>oozie.action.sharelib.for.spark</name> + <value>spark2</value> + </property> + <property> + <name>spark2EventLogDir</name> + <value>/user/spark/spark2ApplicationHistory</value> + </property> + <property> + <name>spark2ExtraListeners</name> + <value>"com.cloudera.spark.lineage.NavigatorAppListener"</value> + </property> + <property> + <name>spark2SqlQueryExecutionListeners</name> + <value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value> + </property> +</configuration> \ No newline at end of file diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml new file mode 100644 index 000000000..9ab3dd603 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml @@ -0,0 +1,95 @@ +<workflow-app name="RdfConverter" xmlns="uri:oozie:workflow:0.5"> + <parameters> + <property> + <name>workingPath</name> + <value>/data/bioschema/ped/</value> + <description>the working path</description> + </property> + <property> + <name>rdfInput</name> + <value>nquads.seq</value> + <description>rdf output of scraping workflow</description> + </property> + <property> + <name>output</name> + <value>json-datacite/</value> + </property> + <property> + <name>profile</name> + <value>Protein</value> + <description>the input data profile that has to be used for conversion (https://bioschemas.org/profiles/)</description> + </property> + <property> + <name>oozie.launcher.mapreduce.map.java.opts</name> + <value>-Xmx4g</value> + </property> + <property> + <name>spark2RdfConversionMaxExecutors</name> + <value>50</value> + </property> + <property> + <name>sparkDriverMemory</name> + <value>7G</value> + <description>memory for driver process</description> + </property> + <property> + <name>sparkExecutorMemory</name> + <value>2G</value> + <description>memory for individual executor</description> + </property> + <property> + <name>spark2ExtraListeners</name> + <value>com.cloudera.spark.lineage.NavigatorAppListener</value> + <description>spark 2.* extra listeners classname</description> + </property> + <property> + <name>spark2YarnHistoryServerAddress</name> + <description>spark 2.* yarn history server address</description> + </property> + <property> + <name>spark2EventLogDir</name> + <description>spark 2.* event log dir location</description> + </property> + </parameters> + + <start to="ResetWorkingPath"/> + <kill name="Kill"> + <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> + </kill> + + <action name="ResetWorkingPath"> + <fs> + <delete path='${workingPath}${output}'/> + </fs> + <ok to="NquadsToDataciteJson"/> + <error to="Kill"/> + </action> + + <action name="NquadsToDataciteJson"> + <spark xmlns="uri:oozie:spark-action:0.2"> + <master>yarn-cluster</master> + <mode>cluster</mode> + <name>NquadsToDataciteJson</name> + <class>eu.dnetlib.dhp.rdfconverter.bioschema.SparkRdfToDatacite</class> + <jar>dhp-rdfconverter-${projectVersion}.jar</jar> + <spark-opts> + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2RdfConversionMaxExecutors} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + </spark-opts> + <arg>--nameNode</arg><arg>${nameNode}</arg> + <arg>--workingPath</arg><arg>${workingPath}</arg> + <arg>--rdfInput</arg><arg>${rdfInput}</arg> + <arg>--output</arg><arg>${output}</arg> + <arg>--profile</arg><arg>${profile}</arg> + </spark> + <ok to="End"/> + <error to="Kill"/> + </action> + + <end name="End"/> +</workflow-app> \ No newline at end of file diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/log4j.properties b/dhp-workflows/dhp-rdfconverter/src/main/resources/log4j.properties new file mode 100644 index 000000000..63cba917e --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/log4j.properties @@ -0,0 +1,9 @@ +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=INFO, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n diff --git a/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java b/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java new file mode 100644 index 000000000..a6945ced8 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.rdfconverter.bioschema; + +import java.io.InputStream; +import java.util.ArrayList; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.rdfconverter.utils.RDFConverter; + +public class ConverterTest { + + static Logger logger = LoggerFactory.getLogger(ConverterTest.class); + + @Test + public void nqToDataciteTest() throws Exception { + InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq"); + String nq = IOUtils.toString(is); + logger.debug("NQ: " + nq); + RDFConverter converter = new RDFConverter(); + ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein"); + if (results != null && !results.isEmpty()) { + logger.info("JSON DATACITE: " + results.get(0)); + } + } + + @Test + public void pedCitationTest() throws Exception { + InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq"); + String nq = IOUtils.toString(is); + logger.debug("NQ: " + nq); + RDFConverter converter = new RDFConverter(); + ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein"); + if (results != null && !results.isEmpty()) { + logger.info("JSON DATACITE: " + results.get(0)); + } + } +} diff --git a/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq b/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq new file mode 100644 index 000000000..f26a4b1d9 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq @@ -0,0 +1,52 @@ +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/retrievedFrom> <https://disprot.org/DP01454> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/retrievedOn> "2021-11-25T12:23:57"^^<http://www.w3.org/2001/XMLSchema#dateTime> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> <http://purl.org/pav/createdWith> <https://github.com/HW-SWeL/BMUSE/releases/tag/0.5.2> . +<https://disprot.org/DP01454> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <https://schema.org/hasBioPolymerSequence> "MSTLFPSLFPRVTETLWFNLDRPCVEETELQQQEQQHQAWLQSIAEKDNNLVPIGKPASEHYDDEEEEDDEDDEDSEEDSEDDEDMQDMDEMNDYNESPDDGEVNEVDMEGNEQDQDQWMI" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <https://schema.org/hasSequenceAnnotation> <https://disprot.org/DP01454#disorder-content> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <https://schema.org/hasSequenceAnnotation> <https://disprot.org/DP01454r001> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <https://schema.org/identifier> "https://identifiers.org/disprot:DP01454" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <https://schema.org/includedInDataset> "https://disprot.org/#2021-08" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <https://schema.org/name> "Anaphase-promoting complex subunit 15" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P60006> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <https://schema.org/taxonomicRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/profiles/Protein/0.11-RELEASE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454#disorder-content> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454#disorder-content> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454#disorder-content> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/name> "Protein disorder content" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/propertyID> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00499> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/159543474> <https://schema.org/value> "5.371900826446281E-1" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://schema.org/rangeEnd> "121" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/77094838> <https://schema.org/rangeStart> "1" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454r001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454r001> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454r001> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454r001> <https://schema.org/subjectOf> <https://identifiers.org/pubmed:26083744> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1595402293> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/name> "disorder" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076> <https://schema.org/termCode> "IDPO:00076" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/assets/data/IDPO_v0.2.owl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/assets/data/IDPO_v0.2.owl> <https://schema.org/name> "IDP ontology" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://schema.org/rangeEnd> "121" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/1951282934> <https://schema.org/rangeStart> "57" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://identifiers.org/pubmed:26083744> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/ScholarlyArticle> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/inDefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <http://purl.uniprot.org/taxonomy/9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <https://identifiers.org/taxonomy:9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/sameAs> <http://purl.obolibrary.org/obo/NCBITaxon_9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/termCode> "9606" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/200140588> <https://schema.org/url> <http://purl.bioontology.org/ontology/NCBITAXON/9606> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTermSet> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://schema.org/name> "NCBI taxon" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0/disprot.org/DP01454/827138196> <https://schema.org/url> <https://bioportal.bioontology.org/ontologies/NCBITAXON> <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . +<https://disprot.org/DP01454> <http://purl.org/dc/terms/title> "DisProt" <https://bioschemas.org/crawl/v1/disprot/DP01454/20211125/0> . \ No newline at end of file diff --git a/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq b/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq new file mode 100644 index 000000000..babe3aeef --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq @@ -0,0 +1,229 @@ +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> <http://purl.org/pav/retrievedFrom> <https://proteinensemble.org/PED00014> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> <http://purl.org/pav/retrievedOn> "2021-12-06T11:52:22"^^<http://www.w3.org/2001/XMLSchema#dateTime> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> <http://purl.org/pav/createdWith> <https://github.com/HW-SWeL/BMUSE/releases/tag/0.5.2> . +<https://proteinensemble.org/PED00014> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CollectionPage> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014> <https://schema.org/citation> <https://identifiers.org/pubmed:20399186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014> <https://schema.org/identifier> "https://identifiers.org/ped:PED00014" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014> <https://schema.org/includedInDataset> "https://proteinensemble.org/#2021-02-12" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014> <https://schema.org/mainEntity> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014> <https://schema.org/name> "Structural ensemble of pSic1 (1-90) phosphorylated at Thr5, Thr33, Thr45, Ser69, Ser76 and Ser80, in complex with SKP1 (4-186) and CDC4 (270-744)" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://identifiers.org/pubmed:20399186> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/ScholarlyArticle> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/ItemList> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://schema.org/itemListElement> <https://proteinensemble.org/PED00014#P07834_D_0> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://schema.org/itemListElement> <https://proteinensemble.org/PED00014#P52286_C_0> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://schema.org/itemListElement> <https://proteinensemble.org/PED00014#P38634_A_1> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/23786852> <https://schema.org/numberOfItems> "3" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/hasBioPolymerSequence> "LKRDLITSLPFEISLKIFNYLQFEDIINSLGVSQNWNKIIRKSTSLWKKLLISENFVSPKGFNSLNLKLSQKYPKLSQQDRLRLSFLENIFILKNWYNPKFVPQRTTLRGHMTSVITCLQFEDNYVITGADDKMIRVYDSINKKFLLQLSGHDGGVWALKYAHGGILVSGSTDRTVRVWDIKKGCCTHVFKGHNSTVRCLDIVEYKNIKYIVTGSRDNTLHVWKLPKESSVPDHGEEHDYPLVFHTPEENPYFVGVLRGHMASVRTVSGHGNIVVSGSYDNTLIVWDVAQMKCLYILSGHTDRIYSTIYDHERKRCISASMDTTIRIWDLENIWNNGECSYATNSASPCAKILGAMYTLQGHTALVGLLRLSDKFLVSAAADGSIRGWDANDYSRKFSYHHTNLSAITTFYVSDNILVSGSENQFNIYNLRSGKLVHANILKDADQIWSVNFKGKTLVAAVEKDGQSFLEILDFS" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/hasSequenceAnnotation> <https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/identifier> "https://identifiers.org/uniprot:P07834" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/name> "Cell division control protein 4" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P07834> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/profiles/Protein/0.11-RELEASE> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/CreativeWork> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/368314152> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2096895906> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1073956941> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/367565188> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1109163979> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/639913980> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/107821945> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2059079389> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1901071107> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/274002240> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/502163975> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P07834_D_0_270_744> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1565361539> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/368314152> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/368314152> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/368314152> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://schema.org/name> "NMR" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://schema.org/termCode> "IDPO:00120" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTermSet> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl> <https://schema.org/name> "IDP ontology" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2096895906> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2096895906> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2096895906> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://schema.org/name> "RDC" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://schema.org/termCode> "IDPO:00166" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1073956941> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1073956941> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1073956941> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://schema.org/name> "chemical shift" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://schema.org/termCode> "IDPO:00167" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/367565188> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/367565188> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/367565188> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://schema.org/name> "relaxation" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://schema.org/termCode> "IDPO:00168" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1109163979> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1109163979> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1109163979> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://schema.org/name> "T2 relaxation" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://schema.org/termCode> "IDPO:00169" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/639913980> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/639913980> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/639913980> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://schema.org/name> "SAXS" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://schema.org/termCode> "IDPO:00125" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/107821945> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/107821945> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/107821945> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://schema.org/name> "TraDES" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://schema.org/termCode> "IDPO:00186" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2059079389> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2059079389> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2059079389> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://schema.org/name> "CNS" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://schema.org/termCode> "IDPO:00192" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1901071107> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1901071107> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1901071107> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://schema.org/name> "CRYSOL" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://schema.org/termCode> "IDPO:00208" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/274002240> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/274002240> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/274002240> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://schema.org/name> "ShiftX" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://schema.org/termCode> "IDPO:00210" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/502163975> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/502163975> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/502163975> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/DefinedTerm> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://schema.org/inDefinedTermSet> <https://disprot.org/assets/data/IDPO_v0.2.owl> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://schema.org/name> "ENSEMBLE" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://schema.org/termCode> "IDPO:00216" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1565361539> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1565361539> <https://schema.org/rangeEnd> "744" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1565361539> <https://schema.org/rangeStart> "270" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0> <https://schema.org/hasBioPolymerSequence> "SNVVLVSGEGERFTVDKKIAERSLLLKNYLNDMHDSNLQNNSDSESDSDSETNHKSKDNNNGDDDDEDDDEIVMPVPNVRSSVLQKVIEWAEHHRDSNFPDEDDDDSRKSAPVDSWDREFLKVDQEMLYEIILAANYLNIKPLLDAGCKVVAEMIRGRSPEEIRRTFNIVNDFTPEEEAAIRR" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0> <https://schema.org/hasSequenceAnnotation> <https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0> <https://schema.org/identifier> "https://identifiers.org/uniprot:P52286" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P52286> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/400036188> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1958752721> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1896099201> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1858414809> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/569393342> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/946591913> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/892712429> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/561273240> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/649186691> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1894898714> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1015020144> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P52286_C_0_4_186> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/240836288> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/400036188> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/400036188> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/400036188> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1958752721> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1958752721> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1958752721> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1896099201> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1896099201> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1896099201> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1858414809> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1858414809> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1858414809> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/569393342> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/569393342> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/569393342> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/946591913> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/946591913> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/946591913> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/892712429> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/892712429> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/892712429> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/561273240> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/561273240> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/561273240> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/649186691> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/649186691> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/649186691> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1894898714> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1894898714> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1894898714> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1015020144> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1015020144> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1015020144> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/240836288> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/240836288> <https://schema.org/rangeEnd> "186" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/240836288> <https://schema.org/rangeStart> "4" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/Protein> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1> <http://purl.org/dc/terms/conformsTo> <https://bioschemas.org/profiles/Protein/0.11-RELEASE> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1> <https://schema.org/hasBioPolymerSequence> "MTPSTPPRSRGTRYLAQPSGNTSSSALMQGQKTPQKPSQNLVPVTPSTTKSFKNAPLLAPPNSNMGMTSPFNGLTSPQRSPFPKSSVKRT" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1> <https://schema.org/hasSequenceAnnotation> <https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1> <https://schema.org/identifier> "https://identifiers.org/uniprot:P38634" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1> <https://schema.org/sameAs> <http://purl.uniprot.org/uniprot/P38634> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceAnnotation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/420904342> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1005499560> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/803096160> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1785075011> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/943606610> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/696555349> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1412437827> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/837490728> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1021447597> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/725322807> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/additionalProperty> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2067319117> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014#P38634_A_1_1_90> <https://schema.org/sequenceLocation> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1503093711> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/420904342> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/420904342> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/420904342> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1005499560> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1005499560> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1005499560> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00166> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/803096160> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/803096160> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/803096160> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00167> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1785075011> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1785075011> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1785075011> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00168> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/943606610> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/943606610> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/943606610> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00169> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/696555349> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/696555349> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/696555349> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1412437827> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1412437827> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1412437827> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00186> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/837490728> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/837490728> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/837490728> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00192> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1021447597> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1021447597> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1021447597> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00208> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/725322807> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/725322807> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/725322807> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00210> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2067319117> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/PropertyValue> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2067319117> <https://schema.org/name> "Term" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/2067319117> <https://schema.org/value> <https://disprot.org/assets/data/IDPO_v0.2.owl:00216> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1503093711> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://schema.org/SequenceRange> <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1503093711> <https://schema.org/rangeEnd> "90" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1/proteinensemble.org/PED00014/1503093711> <https://schema.org/rangeStart> "1" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . +<https://proteinensemble.org/PED00014> <http://purl.org/dc/terms/title> "PED" <https://bioschemas.org/crawl/v1/proteinensemble/PED00014/20211206/1> . diff --git a/dhp-workflows/dhp-rdfconverter/src/test/resources/log4j.properties b/dhp-workflows/dhp-rdfconverter/src/test/resources/log4j.properties new file mode 100644 index 000000000..20f56e38d --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/test/resources/log4j.properties @@ -0,0 +1,11 @@ +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=INFO, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.logger.org = ERROR +log4j.logger.eu.dnetlib = DEBUG +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file