added subjects

This commit is contained in:
Enrico Ottonello 2022-03-17 19:24:07 +01:00
parent 3ef5eec3a6
commit f43bfdb594
4 changed files with 199 additions and 44 deletions

View File

@ -19,7 +19,8 @@ public class BioSchemaProtein {
@JsonProperty("@id") @JsonProperty("@id")
private String id; private String id;
@JsonProperty("@type") @JsonProperty("@type")
private List<String> type; // private List<String> type;
private String type;
@JsonProperty("https://schema.org/identifier") @JsonProperty("https://schema.org/identifier")
private String identifier; private String identifier;
@JsonProperty("https://schema.org/name") @JsonProperty("https://schema.org/name")
@ -50,8 +51,16 @@ public class BioSchemaProtein {
private Link mainEntityOfPage; private Link mainEntityOfPage;
@JsonProperty("https://schema.org/citation") @JsonProperty("https://schema.org/citation")
private Citation citation; private Citation citation;
@JsonProperty("https://schema.org/sameAs")
private List<Link> sameAs;
@JsonProperty("https://schema.org/hasSequenceAnnotation") @JsonProperty("https://schema.org/hasSequenceAnnotation")
private SequenceAnnotation sequenceAnnotation; private Link hasSequenceAnnotation;
@JsonProperty("https://schema.org/additionalProperty")
private List<Link> sequenceAnnotation;
@JsonProperty("https://schema.org/value")
private Link propertyValue;
@JsonProperty("https://schema.org/termCode")
private String termCode;
public String getId() { public String getId() {
return id; return id;
@ -61,11 +70,11 @@ public class BioSchemaProtein {
this.id = id; this.id = id;
} }
public List<String> getType() { public String getType() {
return type; return type;
} }
public void setType(List<String> type) { public void setType(String type) {
this.type = type; this.type = type;
} }
@ -77,9 +86,6 @@ public class BioSchemaProtein {
this.name = name; this.name = name;
} }
@JsonProperty("https://schema.org/sameAs")
private List<Link> sameAs;
public List<Link> getSameAs() { public List<Link> getSameAs() {
return sameAs; return sameAs;
} }
@ -192,11 +198,19 @@ public class BioSchemaProtein {
this.mainEntityOfPage = mainEntityOfPage; this.mainEntityOfPage = mainEntityOfPage;
} }
public SequenceAnnotation getSequenceAnnotation() { public Link getHasSequenceAnnotation() {
return hasSequenceAnnotation;
}
public void setHasSequenceAnnotation(Link hasSequenceAnnotation) {
this.hasSequenceAnnotation = hasSequenceAnnotation;
}
public List<Link> getSequenceAnnotation() {
return sequenceAnnotation; return sequenceAnnotation;
} }
public void setSequenceAnnotation(SequenceAnnotation sequenceAnnotation) { public void setSequenceAnnotation(List<Link> sequenceAnnotation) {
this.sequenceAnnotation = sequenceAnnotation; this.sequenceAnnotation = sequenceAnnotation;
} }
@ -207,6 +221,22 @@ public class BioSchemaProtein {
public void setCitation(Citation citation) { public void setCitation(Citation citation) {
this.citation = citation; this.citation = citation;
} }
public Link getPropertyValue() {
return propertyValue;
}
public void setPropertyValue(Link propertyValue) {
this.propertyValue = propertyValue;
}
public String getTermCode() {
return termCode;
}
public void setTermCode(String termCode) {
this.termCode = termCode;
}
} }
public static class IsPartOfBioChemEntity { public static class IsPartOfBioChemEntity {
@ -357,33 +387,46 @@ public class BioSchemaProtein {
} }
} }
public static class SequenceAnnotationId {
@JsonProperty("@id")
private String id;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
}
public static class SequenceAnnotation { public static class SequenceAnnotation {
@JsonProperty("https://schema.org/additionalProperty") @JsonProperty("https://schema.org/additionalProperty")
private List<AdditionalProperty> additionalProperty; private List<PropertyValue> additionalProperty;
public List<AdditionalProperty> getAdditionalProperty() { public List<PropertyValue> getAdditionalProperty() {
return additionalProperty; return additionalProperty;
} }
public void setAdditionalProperty(List<AdditionalProperty> additionalProperty) { public void setAdditionalProperty(List<PropertyValue> additionalProperty) {
this.additionalProperty = additionalProperty; this.additionalProperty = additionalProperty;
} }
} }
public static class AdditionalProperty {
@JsonProperty("https://schema.org/value")
private List<PropertyValue> propertyValue;
public List<PropertyValue> getPropertyValue() {
return propertyValue;
}
public void setPropertyValue(List<PropertyValue> propertyValue) {
this.propertyValue = propertyValue;
}
}
public static class PropertyValue { public static class PropertyValue {
@JsonProperty("https://schema.org/value")
private DefinedTerm definedTerm;
public DefinedTerm getDefinedTerm() {
return definedTerm;
}
public void setDefinedTerm(DefinedTerm definedTerm) {
this.definedTerm = definedTerm;
}
}
public static class DefinedTerm {
@JsonProperty("@id") @JsonProperty("@id")
private String id; private String id;
@JsonProperty("https://schema.org/termCode") @JsonProperty("https://schema.org/termCode")

View File

@ -21,6 +21,7 @@ public class DataciteProtein {
List<Description> descriptions = new ArrayList<Description>(); List<Description> descriptions = new ArrayList<Description>();
List<Title> titles = new ArrayList<Title>(); List<Title> titles = new ArrayList<Title>();
private List<DataciteDate> dates = new ArrayList<DataciteDate>(); private List<DataciteDate> dates = new ArrayList<DataciteDate>();
private List<Subject> subjects = new ArrayList<Subject>();
@JsonInclude(JsonInclude.Include.NON_NULL) @JsonInclude(JsonInclude.Include.NON_NULL)
public static class Types { public static class Types {
@ -189,6 +190,37 @@ public class DataciteProtein {
} }
} }
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class Subject {
private String schemeURI;
private String value;
private String subjectScheme;
public String getSchemeURI() {
return schemeURI;
}
public void setSchemeURI(String schemeURI) {
this.schemeURI = schemeURI;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public String getSubjectScheme() {
return subjectScheme;
}
public void setSubjectScheme(String subjectScheme) {
this.subjectScheme = subjectScheme;
}
}
public String getId() { public String getId() {
return id; return id;
} }
@ -288,4 +320,12 @@ public class DataciteProtein {
public void setDates(List<DataciteDate> dates) { public void setDates(List<DataciteDate> dates) {
this.dates = dates; this.dates = dates;
} }
public List<Subject> getSubjects() {
return subjects;
}
public void setSubjects(List<Subject> subjects) {
this.subjects = subjects;
}
} }

View File

@ -1,10 +1,12 @@
package eu.dnetlib.dhp.rdfconverter.utils; package eu.dnetlib.dhp.rdfconverter.utils;
import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFFormat;
@ -19,6 +21,7 @@ import com.github.jsonldjava.core.JsonLdOptions;
import com.github.jsonldjava.core.JsonLdProcessor; import com.github.jsonldjava.core.JsonLdProcessor;
import com.github.jsonldjava.utils.JsonUtils; import com.github.jsonldjava.utils.JsonUtils;
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein; import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein;
import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein; import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein;
@ -62,10 +65,18 @@ public class RDFConverter {
}).filter(id -> id != null).collect(Collectors.toList()); }).filter(id -> id != null).collect(Collectors.toList());
ArrayList<String> results = new ArrayList<String>(); ArrayList<String> results = new ArrayList<String>();
final List<DataciteProtein> dataciteProteins = new ArrayList<>();
final Map<String, List<BioSchemaProtein.Link>> sequenceAnnotations = new HashMap<>();
final Map<String, String> propertyValues = new HashMap<>();
final Map<String, BioSchemaProtein.DefinedTerm> definedTerms = new HashMap<>();
final Map<String, BioSchemaProtein.Entry> proteins = new HashMap<>();
bioSchemaProtein.getEntryList().stream().forEach(entry -> { bioSchemaProtein.getEntryList().stream().forEach(entry -> {
if (entry.getType() != null if (entry.getType() != null
&& entry.getType().stream().filter(type -> type.equals("https://schema.org/Protein")).count() == 1) { && entry
.getType()
.equals("https://schema.org/Protein")) {
DataciteProtein dataciteProtein = new DataciteProtein(); DataciteProtein dataciteProtein = new DataciteProtein();
@ -173,8 +184,8 @@ public class RDFConverter {
}); });
} }
if (entry.getSequenceAnnotation() !=null) { if (entry.getHasSequenceAnnotation() != null) {
log.debug("Sequence Annotation found " ); log.debug("Sequence Annotation id found: " + entry.getHasSequenceAnnotation().getId());
} }
String proteinId = ""; String proteinId = "";
@ -186,17 +197,80 @@ public class RDFConverter {
} }
dataciteProtein.setId(proteinId); dataciteProtein.setId(proteinId);
proteins.put(entry.getId(), entry);
ObjectMapper mapper = new ObjectMapper(); dataciteProteins.add(dataciteProtein);
try {
StringWriter writer = new StringWriter();
mapper.writeValue(writer, dataciteProtein);
results.add(writer.toString());
} catch (Exception e) {
throw new RuntimeException(e);
} }
if (entry.getType() != null
&& entry
.getType()
.equals("https://schema.org/SequenceAnnotation")) {
log.debug("Sequence Annotation found ");
log.debug("sequence id > " + entry.getId());
entry.getSequenceAnnotation().forEach(l -> {
log.debug(l.getId());
});
sequenceAnnotations.put(entry.getId(), entry.getSequenceAnnotation());
}
if (entry.getType() != null
&& entry
.getType()
.equals("https://schema.org/PropertyValue")) {
log.debug("Property found ");
log.debug(entry.getPropertyValue().getId());
propertyValues.put(entry.getId(), entry.getPropertyValue().getId());
}
if (entry.getType() != null
&& entry
.getType()
.equals("https://schema.org/DefinedTerm")) {
log.debug("Term found ");
log.debug(entry.getTermCode());
BioSchemaProtein.DefinedTerm term = new BioSchemaProtein.DefinedTerm();
term.setId(entry.getId());
term.setTermCode(entry.getTermCode());
term.setName(entry.getName());
definedTerms.put(term.getId(), term);
} }
}); });
proteins.entrySet().stream().forEach(p -> {
String sequenceAnnotationId = p.getValue().getHasSequenceAnnotation().getId();
List<BioSchemaProtein.Link> propertyIds = sequenceAnnotations.get(sequenceAnnotationId);
List<String> termIds = propertyIds
.stream()
.map(propertyId -> propertyValues.get(propertyId.getId()))
.collect(Collectors.toList());
List<BioSchemaProtein.DefinedTerm> terms = termIds
.stream()
.map(term -> definedTerms.get(term))
.collect(Collectors.toList());
terms.forEach(t -> {
log.debug("protein id: " + p.getKey() + " >>> term: " + t.getId());
dataciteProteins.stream().filter(d -> {
return p.getKey().contains(d.getId());
}).forEach(d -> {
DataciteProtein.Subject subject = new DataciteProtein.Subject();
subject.setSchemeURI(t.getId());
subject.setValue(t.getName());
subject.setSubjectScheme(t.getTermCode());
d.getSubjects().add(subject);
});
});
});
ObjectMapper mapper = new ObjectMapper();
// try {
dataciteProteins.forEach(d -> {
StringWriter writer = new StringWriter();
try {
mapper.writeValue(writer, d);
} catch (IOException e) {
throw new RuntimeException(e);
}
results.add(writer.toString());
});
// } catch (Exception e) {
// throw new RuntimeException(e);
// }
return results; return results;
} }

View File

@ -5,7 +5,6 @@ import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -16,23 +15,22 @@ public class ConverterTest {
static Logger logger = LoggerFactory.getLogger(ConverterTest.class); static Logger logger = LoggerFactory.getLogger(ConverterTest.class);
@Test // @Test
public void nqToDataciteTest() throws Exception { // private void nqToDataciteTest() throws Exception {
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq"); // InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
String nq = IOUtils.toString(is); // String nq = IOUtils.toString(is);
logger.debug("NQ: " + nq); // logger.debug("NQ: " + nq);
RDFConverter converter = new RDFConverter(); // RDFConverter converter = new RDFConverter();
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein"); // ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
if (results != null && !results.isEmpty()) { // if (results != null && !results.isEmpty()) {
logger.info("JSON DATACITE: " + results.get(0)); // logger.info("JSON DATACITE: " + results.get(0));
} // }
} // }
@Test @Test
public void pedCitationTest() throws Exception { public void pedCitationTest() throws Exception {
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq"); InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq");
String nq = IOUtils.toString(is); String nq = IOUtils.toString(is);
logger.debug("NQ: " + nq);
RDFConverter converter = new RDFConverter(); RDFConverter converter = new RDFConverter();
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein"); ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
if (results != null && !results.isEmpty()) { if (results != null && !results.isEmpty()) {