added subjects

This commit is contained in:
Enrico Ottonello 2022-03-17 19:24:07 +01:00
parent 3ef5eec3a6
commit f43bfdb594
4 changed files with 199 additions and 44 deletions

View File

@ -19,7 +19,8 @@ public class BioSchemaProtein {
@JsonProperty("@id")
private String id;
@JsonProperty("@type")
private List<String> type;
// private List<String> type;
private String type;
@JsonProperty("https://schema.org/identifier")
private String identifier;
@JsonProperty("https://schema.org/name")
@ -50,8 +51,16 @@ public class BioSchemaProtein {
private Link mainEntityOfPage;
@JsonProperty("https://schema.org/citation")
private Citation citation;
@JsonProperty("https://schema.org/sameAs")
private List<Link> sameAs;
@JsonProperty("https://schema.org/hasSequenceAnnotation")
private SequenceAnnotation sequenceAnnotation;
private Link hasSequenceAnnotation;
@JsonProperty("https://schema.org/additionalProperty")
private List<Link> sequenceAnnotation;
@JsonProperty("https://schema.org/value")
private Link propertyValue;
@JsonProperty("https://schema.org/termCode")
private String termCode;
public String getId() {
return id;
@ -61,11 +70,11 @@ public class BioSchemaProtein {
this.id = id;
}
public List<String> getType() {
public String getType() {
return type;
}
public void setType(List<String> type) {
public void setType(String type) {
this.type = type;
}
@ -77,9 +86,6 @@ public class BioSchemaProtein {
this.name = name;
}
@JsonProperty("https://schema.org/sameAs")
private List<Link> sameAs;
public List<Link> getSameAs() {
return sameAs;
}
@ -192,11 +198,19 @@ public class BioSchemaProtein {
this.mainEntityOfPage = mainEntityOfPage;
}
public SequenceAnnotation getSequenceAnnotation() {
public Link getHasSequenceAnnotation() {
return hasSequenceAnnotation;
}
public void setHasSequenceAnnotation(Link hasSequenceAnnotation) {
this.hasSequenceAnnotation = hasSequenceAnnotation;
}
public List<Link> getSequenceAnnotation() {
return sequenceAnnotation;
}
public void setSequenceAnnotation(SequenceAnnotation sequenceAnnotation) {
public void setSequenceAnnotation(List<Link> sequenceAnnotation) {
this.sequenceAnnotation = sequenceAnnotation;
}
@ -207,6 +221,22 @@ public class BioSchemaProtein {
public void setCitation(Citation citation) {
this.citation = citation;
}
public Link getPropertyValue() {
return propertyValue;
}
public void setPropertyValue(Link propertyValue) {
this.propertyValue = propertyValue;
}
public String getTermCode() {
return termCode;
}
public void setTermCode(String termCode) {
this.termCode = termCode;
}
}
public static class IsPartOfBioChemEntity {
@ -357,33 +387,46 @@ public class BioSchemaProtein {
}
}
public static class SequenceAnnotationId {
@JsonProperty("@id")
private String id;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
}
public static class SequenceAnnotation {
@JsonProperty("https://schema.org/additionalProperty")
private List<AdditionalProperty> additionalProperty;
private List<PropertyValue> additionalProperty;
public List<AdditionalProperty> getAdditionalProperty() {
public List<PropertyValue> getAdditionalProperty() {
return additionalProperty;
}
public void setAdditionalProperty(List<AdditionalProperty> additionalProperty) {
public void setAdditionalProperty(List<PropertyValue> additionalProperty) {
this.additionalProperty = additionalProperty;
}
}
public static class AdditionalProperty {
public static class PropertyValue {
@JsonProperty("https://schema.org/value")
private List<PropertyValue> propertyValue;
private DefinedTerm definedTerm;
public List<PropertyValue> getPropertyValue() {
return propertyValue;
public DefinedTerm getDefinedTerm() {
return definedTerm;
}
public void setPropertyValue(List<PropertyValue> propertyValue) {
this.propertyValue = propertyValue;
public void setDefinedTerm(DefinedTerm definedTerm) {
this.definedTerm = definedTerm;
}
}
public static class PropertyValue {
public static class DefinedTerm {
@JsonProperty("@id")
private String id;
@JsonProperty("https://schema.org/termCode")

View File

@ -21,6 +21,7 @@ public class DataciteProtein {
List<Description> descriptions = new ArrayList<Description>();
List<Title> titles = new ArrayList<Title>();
private List<DataciteDate> dates = new ArrayList<DataciteDate>();
private List<Subject> subjects = new ArrayList<Subject>();
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class Types {
@ -189,6 +190,37 @@ public class DataciteProtein {
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class Subject {
private String schemeURI;
private String value;
private String subjectScheme;
public String getSchemeURI() {
return schemeURI;
}
public void setSchemeURI(String schemeURI) {
this.schemeURI = schemeURI;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public String getSubjectScheme() {
return subjectScheme;
}
public void setSubjectScheme(String subjectScheme) {
this.subjectScheme = subjectScheme;
}
}
public String getId() {
return id;
}
@ -288,4 +320,12 @@ public class DataciteProtein {
public void setDates(List<DataciteDate> dates) {
this.dates = dates;
}
public List<Subject> getSubjects() {
return subjects;
}
public void setSubjects(List<Subject> subjects) {
this.subjects = subjects;
}
}

View File

@ -1,10 +1,12 @@
package eu.dnetlib.dhp.rdfconverter.utils;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.rio.RDFFormat;
@ -19,6 +21,7 @@ import com.github.jsonldjava.core.JsonLdOptions;
import com.github.jsonldjava.core.JsonLdProcessor;
import com.github.jsonldjava.utils.JsonUtils;
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein;
import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein;
@ -62,10 +65,18 @@ public class RDFConverter {
}).filter(id -> id != null).collect(Collectors.toList());
ArrayList<String> results = new ArrayList<String>();
final List<DataciteProtein> dataciteProteins = new ArrayList<>();
final Map<String, List<BioSchemaProtein.Link>> sequenceAnnotations = new HashMap<>();
final Map<String, String> propertyValues = new HashMap<>();
final Map<String, BioSchemaProtein.DefinedTerm> definedTerms = new HashMap<>();
final Map<String, BioSchemaProtein.Entry> proteins = new HashMap<>();
bioSchemaProtein.getEntryList().stream().forEach(entry -> {
if (entry.getType() != null
&& entry.getType().stream().filter(type -> type.equals("https://schema.org/Protein")).count() == 1) {
&& entry
.getType()
.equals("https://schema.org/Protein")) {
DataciteProtein dataciteProtein = new DataciteProtein();
@ -173,8 +184,8 @@ public class RDFConverter {
});
}
if (entry.getSequenceAnnotation() !=null) {
log.debug("Sequence Annotation found " );
if (entry.getHasSequenceAnnotation() != null) {
log.debug("Sequence Annotation id found: " + entry.getHasSequenceAnnotation().getId());
}
String proteinId = "";
@ -186,17 +197,80 @@ public class RDFConverter {
}
dataciteProtein.setId(proteinId);
ObjectMapper mapper = new ObjectMapper();
try {
StringWriter writer = new StringWriter();
mapper.writeValue(writer, dataciteProtein);
results.add(writer.toString());
} catch (Exception e) {
throw new RuntimeException(e);
}
proteins.put(entry.getId(), entry);
dataciteProteins.add(dataciteProtein);
}
if (entry.getType() != null
&& entry
.getType()
.equals("https://schema.org/SequenceAnnotation")) {
log.debug("Sequence Annotation found ");
log.debug("sequence id > " + entry.getId());
entry.getSequenceAnnotation().forEach(l -> {
log.debug(l.getId());
});
sequenceAnnotations.put(entry.getId(), entry.getSequenceAnnotation());
}
if (entry.getType() != null
&& entry
.getType()
.equals("https://schema.org/PropertyValue")) {
log.debug("Property found ");
log.debug(entry.getPropertyValue().getId());
propertyValues.put(entry.getId(), entry.getPropertyValue().getId());
}
if (entry.getType() != null
&& entry
.getType()
.equals("https://schema.org/DefinedTerm")) {
log.debug("Term found ");
log.debug(entry.getTermCode());
BioSchemaProtein.DefinedTerm term = new BioSchemaProtein.DefinedTerm();
term.setId(entry.getId());
term.setTermCode(entry.getTermCode());
term.setName(entry.getName());
definedTerms.put(term.getId(), term);
}
});
proteins.entrySet().stream().forEach(p -> {
String sequenceAnnotationId = p.getValue().getHasSequenceAnnotation().getId();
List<BioSchemaProtein.Link> propertyIds = sequenceAnnotations.get(sequenceAnnotationId);
List<String> termIds = propertyIds
.stream()
.map(propertyId -> propertyValues.get(propertyId.getId()))
.collect(Collectors.toList());
List<BioSchemaProtein.DefinedTerm> terms = termIds
.stream()
.map(term -> definedTerms.get(term))
.collect(Collectors.toList());
terms.forEach(t -> {
log.debug("protein id: " + p.getKey() + " >>> term: " + t.getId());
dataciteProteins.stream().filter(d -> {
return p.getKey().contains(d.getId());
}).forEach(d -> {
DataciteProtein.Subject subject = new DataciteProtein.Subject();
subject.setSchemeURI(t.getId());
subject.setValue(t.getName());
subject.setSubjectScheme(t.getTermCode());
d.getSubjects().add(subject);
});
});
});
ObjectMapper mapper = new ObjectMapper();
// try {
dataciteProteins.forEach(d -> {
StringWriter writer = new StringWriter();
try {
mapper.writeValue(writer, d);
} catch (IOException e) {
throw new RuntimeException(e);
}
results.add(writer.toString());
});
// } catch (Exception e) {
// throw new RuntimeException(e);
// }
return results;
}

View File

@ -5,7 +5,6 @@ import java.io.InputStream;
import java.util.ArrayList;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -16,23 +15,22 @@ public class ConverterTest {
static Logger logger = LoggerFactory.getLogger(ConverterTest.class);
@Test
public void nqToDataciteTest() throws Exception {
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
String nq = IOUtils.toString(is);
logger.debug("NQ: " + nq);
RDFConverter converter = new RDFConverter();
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
if (results != null && !results.isEmpty()) {
logger.info("JSON DATACITE: " + results.get(0));
}
}
// @Test
// private void nqToDataciteTest() throws Exception {
// InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
// String nq = IOUtils.toString(is);
// logger.debug("NQ: " + nq);
// RDFConverter converter = new RDFConverter();
// ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
// if (results != null && !results.isEmpty()) {
// logger.info("JSON DATACITE: " + results.get(0));
// }
// }
@Test
public void pedCitationTest() throws Exception {
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq");
String nq = IOUtils.toString(is);
logger.debug("NQ: " + nq);
RDFConverter converter = new RDFConverter();
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
if (results != null && !results.isEmpty()) {