forked from D-Net/dnet-hadoop
added subjects
This commit is contained in:
parent
3ef5eec3a6
commit
f43bfdb594
|
@ -19,7 +19,8 @@ public class BioSchemaProtein {
|
||||||
@JsonProperty("@id")
|
@JsonProperty("@id")
|
||||||
private String id;
|
private String id;
|
||||||
@JsonProperty("@type")
|
@JsonProperty("@type")
|
||||||
private List<String> type;
|
// private List<String> type;
|
||||||
|
private String type;
|
||||||
@JsonProperty("https://schema.org/identifier")
|
@JsonProperty("https://schema.org/identifier")
|
||||||
private String identifier;
|
private String identifier;
|
||||||
@JsonProperty("https://schema.org/name")
|
@JsonProperty("https://schema.org/name")
|
||||||
|
@ -50,8 +51,16 @@ public class BioSchemaProtein {
|
||||||
private Link mainEntityOfPage;
|
private Link mainEntityOfPage;
|
||||||
@JsonProperty("https://schema.org/citation")
|
@JsonProperty("https://schema.org/citation")
|
||||||
private Citation citation;
|
private Citation citation;
|
||||||
|
@JsonProperty("https://schema.org/sameAs")
|
||||||
|
private List<Link> sameAs;
|
||||||
@JsonProperty("https://schema.org/hasSequenceAnnotation")
|
@JsonProperty("https://schema.org/hasSequenceAnnotation")
|
||||||
private SequenceAnnotation sequenceAnnotation;
|
private Link hasSequenceAnnotation;
|
||||||
|
@JsonProperty("https://schema.org/additionalProperty")
|
||||||
|
private List<Link> sequenceAnnotation;
|
||||||
|
@JsonProperty("https://schema.org/value")
|
||||||
|
private Link propertyValue;
|
||||||
|
@JsonProperty("https://schema.org/termCode")
|
||||||
|
private String termCode;
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
return id;
|
return id;
|
||||||
|
@ -61,11 +70,11 @@ public class BioSchemaProtein {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getType() {
|
public String getType() {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setType(List<String> type) {
|
public void setType(String type) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,9 +86,6 @@ public class BioSchemaProtein {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty("https://schema.org/sameAs")
|
|
||||||
private List<Link> sameAs;
|
|
||||||
|
|
||||||
public List<Link> getSameAs() {
|
public List<Link> getSameAs() {
|
||||||
return sameAs;
|
return sameAs;
|
||||||
}
|
}
|
||||||
|
@ -192,11 +198,19 @@ public class BioSchemaProtein {
|
||||||
this.mainEntityOfPage = mainEntityOfPage;
|
this.mainEntityOfPage = mainEntityOfPage;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SequenceAnnotation getSequenceAnnotation() {
|
public Link getHasSequenceAnnotation() {
|
||||||
|
return hasSequenceAnnotation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHasSequenceAnnotation(Link hasSequenceAnnotation) {
|
||||||
|
this.hasSequenceAnnotation = hasSequenceAnnotation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Link> getSequenceAnnotation() {
|
||||||
return sequenceAnnotation;
|
return sequenceAnnotation;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSequenceAnnotation(SequenceAnnotation sequenceAnnotation) {
|
public void setSequenceAnnotation(List<Link> sequenceAnnotation) {
|
||||||
this.sequenceAnnotation = sequenceAnnotation;
|
this.sequenceAnnotation = sequenceAnnotation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,6 +221,22 @@ public class BioSchemaProtein {
|
||||||
public void setCitation(Citation citation) {
|
public void setCitation(Citation citation) {
|
||||||
this.citation = citation;
|
this.citation = citation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Link getPropertyValue() {
|
||||||
|
return propertyValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPropertyValue(Link propertyValue) {
|
||||||
|
this.propertyValue = propertyValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTermCode() {
|
||||||
|
return termCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTermCode(String termCode) {
|
||||||
|
this.termCode = termCode;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class IsPartOfBioChemEntity {
|
public static class IsPartOfBioChemEntity {
|
||||||
|
@ -357,33 +387,46 @@ public class BioSchemaProtein {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class SequenceAnnotationId {
|
||||||
|
@JsonProperty("@id")
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static class SequenceAnnotation {
|
public static class SequenceAnnotation {
|
||||||
@JsonProperty("https://schema.org/additionalProperty")
|
@JsonProperty("https://schema.org/additionalProperty")
|
||||||
private List<AdditionalProperty> additionalProperty;
|
private List<PropertyValue> additionalProperty;
|
||||||
|
|
||||||
public List<AdditionalProperty> getAdditionalProperty() {
|
public List<PropertyValue> getAdditionalProperty() {
|
||||||
return additionalProperty;
|
return additionalProperty;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setAdditionalProperty(List<AdditionalProperty> additionalProperty) {
|
public void setAdditionalProperty(List<PropertyValue> additionalProperty) {
|
||||||
this.additionalProperty = additionalProperty;
|
this.additionalProperty = additionalProperty;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class AdditionalProperty {
|
public static class PropertyValue {
|
||||||
@JsonProperty("https://schema.org/value")
|
@JsonProperty("https://schema.org/value")
|
||||||
private List<PropertyValue> propertyValue;
|
private DefinedTerm definedTerm;
|
||||||
|
|
||||||
public List<PropertyValue> getPropertyValue() {
|
public DefinedTerm getDefinedTerm() {
|
||||||
return propertyValue;
|
return definedTerm;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPropertyValue(List<PropertyValue> propertyValue) {
|
public void setDefinedTerm(DefinedTerm definedTerm) {
|
||||||
this.propertyValue = propertyValue;
|
this.definedTerm = definedTerm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class PropertyValue {
|
public static class DefinedTerm {
|
||||||
@JsonProperty("@id")
|
@JsonProperty("@id")
|
||||||
private String id;
|
private String id;
|
||||||
@JsonProperty("https://schema.org/termCode")
|
@JsonProperty("https://schema.org/termCode")
|
||||||
|
|
|
@ -21,6 +21,7 @@ public class DataciteProtein {
|
||||||
List<Description> descriptions = new ArrayList<Description>();
|
List<Description> descriptions = new ArrayList<Description>();
|
||||||
List<Title> titles = new ArrayList<Title>();
|
List<Title> titles = new ArrayList<Title>();
|
||||||
private List<DataciteDate> dates = new ArrayList<DataciteDate>();
|
private List<DataciteDate> dates = new ArrayList<DataciteDate>();
|
||||||
|
private List<Subject> subjects = new ArrayList<Subject>();
|
||||||
|
|
||||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public static class Types {
|
public static class Types {
|
||||||
|
@ -189,6 +190,37 @@ public class DataciteProtein {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
public static class Subject {
|
||||||
|
private String schemeURI;
|
||||||
|
private String value;
|
||||||
|
private String subjectScheme;
|
||||||
|
|
||||||
|
public String getSchemeURI() {
|
||||||
|
return schemeURI;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSchemeURI(String schemeURI) {
|
||||||
|
this.schemeURI = schemeURI;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setValue(String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSubjectScheme() {
|
||||||
|
return subjectScheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubjectScheme(String subjectScheme) {
|
||||||
|
this.subjectScheme = subjectScheme;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
@ -288,4 +320,12 @@ public class DataciteProtein {
|
||||||
public void setDates(List<DataciteDate> dates) {
|
public void setDates(List<DataciteDate> dates) {
|
||||||
this.dates = dates;
|
this.dates = dates;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<Subject> getSubjects() {
|
||||||
|
return subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubjects(List<Subject> subjects) {
|
||||||
|
this.subjects = subjects;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.rdfconverter.utils;
|
package eu.dnetlib.dhp.rdfconverter.utils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.eclipse.rdf4j.model.Model;
|
import org.eclipse.rdf4j.model.Model;
|
||||||
import org.eclipse.rdf4j.rio.RDFFormat;
|
import org.eclipse.rdf4j.rio.RDFFormat;
|
||||||
|
@ -19,6 +21,7 @@ import com.github.jsonldjava.core.JsonLdOptions;
|
||||||
import com.github.jsonldjava.core.JsonLdProcessor;
|
import com.github.jsonldjava.core.JsonLdProcessor;
|
||||||
import com.github.jsonldjava.utils.JsonUtils;
|
import com.github.jsonldjava.utils.JsonUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
||||||
import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein;
|
import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein;
|
||||||
import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein;
|
import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein;
|
||||||
|
|
||||||
|
@ -62,10 +65,18 @@ public class RDFConverter {
|
||||||
}).filter(id -> id != null).collect(Collectors.toList());
|
}).filter(id -> id != null).collect(Collectors.toList());
|
||||||
|
|
||||||
ArrayList<String> results = new ArrayList<String>();
|
ArrayList<String> results = new ArrayList<String>();
|
||||||
|
final List<DataciteProtein> dataciteProteins = new ArrayList<>();
|
||||||
|
final Map<String, List<BioSchemaProtein.Link>> sequenceAnnotations = new HashMap<>();
|
||||||
|
final Map<String, String> propertyValues = new HashMap<>();
|
||||||
|
final Map<String, BioSchemaProtein.DefinedTerm> definedTerms = new HashMap<>();
|
||||||
|
final Map<String, BioSchemaProtein.Entry> proteins = new HashMap<>();
|
||||||
|
|
||||||
bioSchemaProtein.getEntryList().stream().forEach(entry -> {
|
bioSchemaProtein.getEntryList().stream().forEach(entry -> {
|
||||||
|
|
||||||
if (entry.getType() != null
|
if (entry.getType() != null
|
||||||
&& entry.getType().stream().filter(type -> type.equals("https://schema.org/Protein")).count() == 1) {
|
&& entry
|
||||||
|
.getType()
|
||||||
|
.equals("https://schema.org/Protein")) {
|
||||||
|
|
||||||
DataciteProtein dataciteProtein = new DataciteProtein();
|
DataciteProtein dataciteProtein = new DataciteProtein();
|
||||||
|
|
||||||
|
@ -173,8 +184,8 @@ public class RDFConverter {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (entry.getSequenceAnnotation() !=null) {
|
if (entry.getHasSequenceAnnotation() != null) {
|
||||||
log.debug("Sequence Annotation found " );
|
log.debug("Sequence Annotation id found: " + entry.getHasSequenceAnnotation().getId());
|
||||||
}
|
}
|
||||||
|
|
||||||
String proteinId = "";
|
String proteinId = "";
|
||||||
|
@ -186,17 +197,80 @@ public class RDFConverter {
|
||||||
}
|
}
|
||||||
|
|
||||||
dataciteProtein.setId(proteinId);
|
dataciteProtein.setId(proteinId);
|
||||||
|
proteins.put(entry.getId(), entry);
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
dataciteProteins.add(dataciteProtein);
|
||||||
try {
|
}
|
||||||
StringWriter writer = new StringWriter();
|
if (entry.getType() != null
|
||||||
mapper.writeValue(writer, dataciteProtein);
|
&& entry
|
||||||
results.add(writer.toString());
|
.getType()
|
||||||
} catch (Exception e) {
|
.equals("https://schema.org/SequenceAnnotation")) {
|
||||||
throw new RuntimeException(e);
|
log.debug("Sequence Annotation found ");
|
||||||
}
|
log.debug("sequence id > " + entry.getId());
|
||||||
|
entry.getSequenceAnnotation().forEach(l -> {
|
||||||
|
log.debug(l.getId());
|
||||||
|
});
|
||||||
|
sequenceAnnotations.put(entry.getId(), entry.getSequenceAnnotation());
|
||||||
|
}
|
||||||
|
if (entry.getType() != null
|
||||||
|
&& entry
|
||||||
|
.getType()
|
||||||
|
.equals("https://schema.org/PropertyValue")) {
|
||||||
|
log.debug("Property found ");
|
||||||
|
log.debug(entry.getPropertyValue().getId());
|
||||||
|
propertyValues.put(entry.getId(), entry.getPropertyValue().getId());
|
||||||
|
}
|
||||||
|
if (entry.getType() != null
|
||||||
|
&& entry
|
||||||
|
.getType()
|
||||||
|
.equals("https://schema.org/DefinedTerm")) {
|
||||||
|
log.debug("Term found ");
|
||||||
|
log.debug(entry.getTermCode());
|
||||||
|
BioSchemaProtein.DefinedTerm term = new BioSchemaProtein.DefinedTerm();
|
||||||
|
term.setId(entry.getId());
|
||||||
|
term.setTermCode(entry.getTermCode());
|
||||||
|
term.setName(entry.getName());
|
||||||
|
definedTerms.put(term.getId(), term);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
proteins.entrySet().stream().forEach(p -> {
|
||||||
|
String sequenceAnnotationId = p.getValue().getHasSequenceAnnotation().getId();
|
||||||
|
List<BioSchemaProtein.Link> propertyIds = sequenceAnnotations.get(sequenceAnnotationId);
|
||||||
|
List<String> termIds = propertyIds
|
||||||
|
.stream()
|
||||||
|
.map(propertyId -> propertyValues.get(propertyId.getId()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
List<BioSchemaProtein.DefinedTerm> terms = termIds
|
||||||
|
.stream()
|
||||||
|
.map(term -> definedTerms.get(term))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
terms.forEach(t -> {
|
||||||
|
log.debug("protein id: " + p.getKey() + " >>> term: " + t.getId());
|
||||||
|
dataciteProteins.stream().filter(d -> {
|
||||||
|
return p.getKey().contains(d.getId());
|
||||||
|
}).forEach(d -> {
|
||||||
|
DataciteProtein.Subject subject = new DataciteProtein.Subject();
|
||||||
|
subject.setSchemeURI(t.getId());
|
||||||
|
subject.setValue(t.getName());
|
||||||
|
subject.setSubjectScheme(t.getTermCode());
|
||||||
|
d.getSubjects().add(subject);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
// try {
|
||||||
|
dataciteProteins.forEach(d -> {
|
||||||
|
StringWriter writer = new StringWriter();
|
||||||
|
try {
|
||||||
|
mapper.writeValue(writer, d);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
results.add(writer.toString());
|
||||||
|
});
|
||||||
|
// } catch (Exception e) {
|
||||||
|
// throw new RuntimeException(e);
|
||||||
|
// }
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,6 @@ import java.io.InputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.jupiter.api.Disabled;
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -16,23 +15,22 @@ public class ConverterTest {
|
||||||
|
|
||||||
static Logger logger = LoggerFactory.getLogger(ConverterTest.class);
|
static Logger logger = LoggerFactory.getLogger(ConverterTest.class);
|
||||||
|
|
||||||
@Test
|
// @Test
|
||||||
public void nqToDataciteTest() throws Exception {
|
// private void nqToDataciteTest() throws Exception {
|
||||||
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
|
// InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
|
||||||
String nq = IOUtils.toString(is);
|
// String nq = IOUtils.toString(is);
|
||||||
logger.debug("NQ: " + nq);
|
// logger.debug("NQ: " + nq);
|
||||||
RDFConverter converter = new RDFConverter();
|
// RDFConverter converter = new RDFConverter();
|
||||||
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
|
// ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
|
||||||
if (results != null && !results.isEmpty()) {
|
// if (results != null && !results.isEmpty()) {
|
||||||
logger.info("JSON DATACITE: " + results.get(0));
|
// logger.info("JSON DATACITE: " + results.get(0));
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pedCitationTest() throws Exception {
|
public void pedCitationTest() throws Exception {
|
||||||
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq");
|
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq");
|
||||||
String nq = IOUtils.toString(is);
|
String nq = IOUtils.toString(is);
|
||||||
logger.debug("NQ: " + nq);
|
|
||||||
RDFConverter converter = new RDFConverter();
|
RDFConverter converter = new RDFConverter();
|
||||||
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
|
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
|
||||||
if (results != null && !results.isEmpty()) {
|
if (results != null && !results.isEmpty()) {
|
||||||
|
|
Loading…
Reference in New Issue