forked from D-Net/dnet-hadoop
added subjects
This commit is contained in:
parent
3ef5eec3a6
commit
f43bfdb594
|
@ -19,7 +19,8 @@ public class BioSchemaProtein {
|
|||
@JsonProperty("@id")
|
||||
private String id;
|
||||
@JsonProperty("@type")
|
||||
private List<String> type;
|
||||
// private List<String> type;
|
||||
private String type;
|
||||
@JsonProperty("https://schema.org/identifier")
|
||||
private String identifier;
|
||||
@JsonProperty("https://schema.org/name")
|
||||
|
@ -50,8 +51,16 @@ public class BioSchemaProtein {
|
|||
private Link mainEntityOfPage;
|
||||
@JsonProperty("https://schema.org/citation")
|
||||
private Citation citation;
|
||||
@JsonProperty("https://schema.org/sameAs")
|
||||
private List<Link> sameAs;
|
||||
@JsonProperty("https://schema.org/hasSequenceAnnotation")
|
||||
private SequenceAnnotation sequenceAnnotation;
|
||||
private Link hasSequenceAnnotation;
|
||||
@JsonProperty("https://schema.org/additionalProperty")
|
||||
private List<Link> sequenceAnnotation;
|
||||
@JsonProperty("https://schema.org/value")
|
||||
private Link propertyValue;
|
||||
@JsonProperty("https://schema.org/termCode")
|
||||
private String termCode;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
|
@ -61,11 +70,11 @@ public class BioSchemaProtein {
|
|||
this.id = id;
|
||||
}
|
||||
|
||||
public List<String> getType() {
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(List<String> type) {
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
|
@ -77,9 +86,6 @@ public class BioSchemaProtein {
|
|||
this.name = name;
|
||||
}
|
||||
|
||||
@JsonProperty("https://schema.org/sameAs")
|
||||
private List<Link> sameAs;
|
||||
|
||||
public List<Link> getSameAs() {
|
||||
return sameAs;
|
||||
}
|
||||
|
@ -192,11 +198,19 @@ public class BioSchemaProtein {
|
|||
this.mainEntityOfPage = mainEntityOfPage;
|
||||
}
|
||||
|
||||
public SequenceAnnotation getSequenceAnnotation() {
|
||||
public Link getHasSequenceAnnotation() {
|
||||
return hasSequenceAnnotation;
|
||||
}
|
||||
|
||||
public void setHasSequenceAnnotation(Link hasSequenceAnnotation) {
|
||||
this.hasSequenceAnnotation = hasSequenceAnnotation;
|
||||
}
|
||||
|
||||
public List<Link> getSequenceAnnotation() {
|
||||
return sequenceAnnotation;
|
||||
}
|
||||
|
||||
public void setSequenceAnnotation(SequenceAnnotation sequenceAnnotation) {
|
||||
public void setSequenceAnnotation(List<Link> sequenceAnnotation) {
|
||||
this.sequenceAnnotation = sequenceAnnotation;
|
||||
}
|
||||
|
||||
|
@ -207,6 +221,22 @@ public class BioSchemaProtein {
|
|||
public void setCitation(Citation citation) {
|
||||
this.citation = citation;
|
||||
}
|
||||
|
||||
public Link getPropertyValue() {
|
||||
return propertyValue;
|
||||
}
|
||||
|
||||
public void setPropertyValue(Link propertyValue) {
|
||||
this.propertyValue = propertyValue;
|
||||
}
|
||||
|
||||
public String getTermCode() {
|
||||
return termCode;
|
||||
}
|
||||
|
||||
public void setTermCode(String termCode) {
|
||||
this.termCode = termCode;
|
||||
}
|
||||
}
|
||||
|
||||
public static class IsPartOfBioChemEntity {
|
||||
|
@ -357,33 +387,46 @@ public class BioSchemaProtein {
|
|||
}
|
||||
}
|
||||
|
||||
public static class SequenceAnnotationId {
|
||||
@JsonProperty("@id")
|
||||
private String id;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
}
|
||||
|
||||
public static class SequenceAnnotation {
|
||||
@JsonProperty("https://schema.org/additionalProperty")
|
||||
private List<AdditionalProperty> additionalProperty;
|
||||
private List<PropertyValue> additionalProperty;
|
||||
|
||||
public List<AdditionalProperty> getAdditionalProperty() {
|
||||
public List<PropertyValue> getAdditionalProperty() {
|
||||
return additionalProperty;
|
||||
}
|
||||
|
||||
public void setAdditionalProperty(List<AdditionalProperty> additionalProperty) {
|
||||
public void setAdditionalProperty(List<PropertyValue> additionalProperty) {
|
||||
this.additionalProperty = additionalProperty;
|
||||
}
|
||||
}
|
||||
|
||||
public static class AdditionalProperty {
|
||||
public static class PropertyValue {
|
||||
@JsonProperty("https://schema.org/value")
|
||||
private List<PropertyValue> propertyValue;
|
||||
private DefinedTerm definedTerm;
|
||||
|
||||
public List<PropertyValue> getPropertyValue() {
|
||||
return propertyValue;
|
||||
public DefinedTerm getDefinedTerm() {
|
||||
return definedTerm;
|
||||
}
|
||||
|
||||
public void setPropertyValue(List<PropertyValue> propertyValue) {
|
||||
this.propertyValue = propertyValue;
|
||||
public void setDefinedTerm(DefinedTerm definedTerm) {
|
||||
this.definedTerm = definedTerm;
|
||||
}
|
||||
}
|
||||
|
||||
public static class PropertyValue {
|
||||
public static class DefinedTerm {
|
||||
@JsonProperty("@id")
|
||||
private String id;
|
||||
@JsonProperty("https://schema.org/termCode")
|
||||
|
|
|
@ -21,6 +21,7 @@ public class DataciteProtein {
|
|||
List<Description> descriptions = new ArrayList<Description>();
|
||||
List<Title> titles = new ArrayList<Title>();
|
||||
private List<DataciteDate> dates = new ArrayList<DataciteDate>();
|
||||
private List<Subject> subjects = new ArrayList<Subject>();
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public static class Types {
|
||||
|
@ -189,6 +190,37 @@ public class DataciteProtein {
|
|||
}
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public static class Subject {
|
||||
private String schemeURI;
|
||||
private String value;
|
||||
private String subjectScheme;
|
||||
|
||||
public String getSchemeURI() {
|
||||
return schemeURI;
|
||||
}
|
||||
|
||||
public void setSchemeURI(String schemeURI) {
|
||||
this.schemeURI = schemeURI;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String getSubjectScheme() {
|
||||
return subjectScheme;
|
||||
}
|
||||
|
||||
public void setSubjectScheme(String subjectScheme) {
|
||||
this.subjectScheme = subjectScheme;
|
||||
}
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
@ -288,4 +320,12 @@ public class DataciteProtein {
|
|||
public void setDates(List<DataciteDate> dates) {
|
||||
this.dates = dates;
|
||||
}
|
||||
|
||||
public List<Subject> getSubjects() {
|
||||
return subjects;
|
||||
}
|
||||
|
||||
public void setSubjects(List<Subject> subjects) {
|
||||
this.subjects = subjects;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.rdfconverter.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.io.StringWriter;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.eclipse.rdf4j.model.Model;
|
||||
import org.eclipse.rdf4j.rio.RDFFormat;
|
||||
|
@ -19,6 +21,7 @@ import com.github.jsonldjava.core.JsonLdOptions;
|
|||
import com.github.jsonldjava.core.JsonLdProcessor;
|
||||
import com.github.jsonldjava.utils.JsonUtils;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
||||
import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein;
|
||||
import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein;
|
||||
|
||||
|
@ -62,10 +65,18 @@ public class RDFConverter {
|
|||
}).filter(id -> id != null).collect(Collectors.toList());
|
||||
|
||||
ArrayList<String> results = new ArrayList<String>();
|
||||
final List<DataciteProtein> dataciteProteins = new ArrayList<>();
|
||||
final Map<String, List<BioSchemaProtein.Link>> sequenceAnnotations = new HashMap<>();
|
||||
final Map<String, String> propertyValues = new HashMap<>();
|
||||
final Map<String, BioSchemaProtein.DefinedTerm> definedTerms = new HashMap<>();
|
||||
final Map<String, BioSchemaProtein.Entry> proteins = new HashMap<>();
|
||||
|
||||
bioSchemaProtein.getEntryList().stream().forEach(entry -> {
|
||||
|
||||
if (entry.getType() != null
|
||||
&& entry.getType().stream().filter(type -> type.equals("https://schema.org/Protein")).count() == 1) {
|
||||
&& entry
|
||||
.getType()
|
||||
.equals("https://schema.org/Protein")) {
|
||||
|
||||
DataciteProtein dataciteProtein = new DataciteProtein();
|
||||
|
||||
|
@ -173,8 +184,8 @@ public class RDFConverter {
|
|||
});
|
||||
}
|
||||
|
||||
if (entry.getSequenceAnnotation() !=null) {
|
||||
log.debug("Sequence Annotation found " );
|
||||
if (entry.getHasSequenceAnnotation() != null) {
|
||||
log.debug("Sequence Annotation id found: " + entry.getHasSequenceAnnotation().getId());
|
||||
}
|
||||
|
||||
String proteinId = "";
|
||||
|
@ -186,17 +197,80 @@ public class RDFConverter {
|
|||
}
|
||||
|
||||
dataciteProtein.setId(proteinId);
|
||||
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
try {
|
||||
StringWriter writer = new StringWriter();
|
||||
mapper.writeValue(writer, dataciteProtein);
|
||||
results.add(writer.toString());
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
proteins.put(entry.getId(), entry);
|
||||
dataciteProteins.add(dataciteProtein);
|
||||
}
|
||||
if (entry.getType() != null
|
||||
&& entry
|
||||
.getType()
|
||||
.equals("https://schema.org/SequenceAnnotation")) {
|
||||
log.debug("Sequence Annotation found ");
|
||||
log.debug("sequence id > " + entry.getId());
|
||||
entry.getSequenceAnnotation().forEach(l -> {
|
||||
log.debug(l.getId());
|
||||
});
|
||||
sequenceAnnotations.put(entry.getId(), entry.getSequenceAnnotation());
|
||||
}
|
||||
if (entry.getType() != null
|
||||
&& entry
|
||||
.getType()
|
||||
.equals("https://schema.org/PropertyValue")) {
|
||||
log.debug("Property found ");
|
||||
log.debug(entry.getPropertyValue().getId());
|
||||
propertyValues.put(entry.getId(), entry.getPropertyValue().getId());
|
||||
}
|
||||
if (entry.getType() != null
|
||||
&& entry
|
||||
.getType()
|
||||
.equals("https://schema.org/DefinedTerm")) {
|
||||
log.debug("Term found ");
|
||||
log.debug(entry.getTermCode());
|
||||
BioSchemaProtein.DefinedTerm term = new BioSchemaProtein.DefinedTerm();
|
||||
term.setId(entry.getId());
|
||||
term.setTermCode(entry.getTermCode());
|
||||
term.setName(entry.getName());
|
||||
definedTerms.put(term.getId(), term);
|
||||
}
|
||||
});
|
||||
proteins.entrySet().stream().forEach(p -> {
|
||||
String sequenceAnnotationId = p.getValue().getHasSequenceAnnotation().getId();
|
||||
List<BioSchemaProtein.Link> propertyIds = sequenceAnnotations.get(sequenceAnnotationId);
|
||||
List<String> termIds = propertyIds
|
||||
.stream()
|
||||
.map(propertyId -> propertyValues.get(propertyId.getId()))
|
||||
.collect(Collectors.toList());
|
||||
List<BioSchemaProtein.DefinedTerm> terms = termIds
|
||||
.stream()
|
||||
.map(term -> definedTerms.get(term))
|
||||
.collect(Collectors.toList());
|
||||
terms.forEach(t -> {
|
||||
log.debug("protein id: " + p.getKey() + " >>> term: " + t.getId());
|
||||
dataciteProteins.stream().filter(d -> {
|
||||
return p.getKey().contains(d.getId());
|
||||
}).forEach(d -> {
|
||||
DataciteProtein.Subject subject = new DataciteProtein.Subject();
|
||||
subject.setSchemeURI(t.getId());
|
||||
subject.setValue(t.getName());
|
||||
subject.setSubjectScheme(t.getTermCode());
|
||||
d.getSubjects().add(subject);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
// try {
|
||||
dataciteProteins.forEach(d -> {
|
||||
StringWriter writer = new StringWriter();
|
||||
try {
|
||||
mapper.writeValue(writer, d);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
results.add(writer.toString());
|
||||
});
|
||||
// } catch (Exception e) {
|
||||
// throw new RuntimeException(e);
|
||||
// }
|
||||
return results;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@ import java.io.InputStream;
|
|||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -16,23 +15,22 @@ public class ConverterTest {
|
|||
|
||||
static Logger logger = LoggerFactory.getLogger(ConverterTest.class);
|
||||
|
||||
@Test
|
||||
public void nqToDataciteTest() throws Exception {
|
||||
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
|
||||
String nq = IOUtils.toString(is);
|
||||
logger.debug("NQ: " + nq);
|
||||
RDFConverter converter = new RDFConverter();
|
||||
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
|
||||
if (results != null && !results.isEmpty()) {
|
||||
logger.info("JSON DATACITE: " + results.get(0));
|
||||
}
|
||||
}
|
||||
// @Test
|
||||
// private void nqToDataciteTest() throws Exception {
|
||||
// InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq");
|
||||
// String nq = IOUtils.toString(is);
|
||||
// logger.debug("NQ: " + nq);
|
||||
// RDFConverter converter = new RDFConverter();
|
||||
// ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
|
||||
// if (results != null && !results.isEmpty()) {
|
||||
// logger.info("JSON DATACITE: " + results.get(0));
|
||||
// }
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void pedCitationTest() throws Exception {
|
||||
InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/ped.nq");
|
||||
String nq = IOUtils.toString(is);
|
||||
logger.debug("NQ: " + nq);
|
||||
RDFConverter converter = new RDFConverter();
|
||||
ArrayList<String> results = converter.nQuadsFile2DataciteJson(nq, "Protein");
|
||||
if (results != null && !results.isEmpty()) {
|
||||
|
|
Loading…
Reference in New Issue