package eu.dnetlib.dhp.rdfconverter.utils; import java.io.StringReader; import java.io.StringWriter; import java.util.*; import java.util.stream.Collectors; import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFWriter; import org.eclipse.rdf4j.rio.Rio; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.github.jsonldjava.core.JsonLdOptions; import com.github.jsonldjava.core.JsonLdProcessor; import com.github.jsonldjava.utils.JsonUtils; import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein; import eu.dnetlib.dhp.rdfconverter.bioschema.model.DataciteProtein; public class RDFConverter { private static final Logger log = LoggerFactory.getLogger(RDFConverter.class); public ArrayList nQuadsFile2DataciteJson(String nquads, String profile) throws Exception { if (profile.equals("Protein")) { return nQuadsFile2DataciteJson(nquads); } throw new RuntimeException("Profile not supported"); } private ArrayList nQuadsFile2DataciteJson(String nquads) throws Exception { StringReader reader = new StringReader(nquads); Model model = Rio.parse(reader, "", RDFFormat.NQUADS); StringWriter jsonLDWriter = new StringWriter(); RDFWriter rdfRecordWriter = Rio.createWriter(RDFFormat.JSONLD, jsonLDWriter); Rio.write(model, rdfRecordWriter); String jsonLDBuffer = jsonLDWriter.toString(); Object jsonObject = JsonUtils.fromString(jsonLDBuffer); Object compact = JsonLdProcessor.compact(jsonObject, new HashMap<>(), new JsonLdOptions()); String compactContent = JsonUtils.toString(compact); log.debug("jsonld: " + compactContent); ObjectMapper objectMapper = new ObjectMapper(); objectMapper.enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY); objectMapper.enable(DeserializationFeature.ACCEPT_EMPTY_STRING_AS_NULL_OBJECT); objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); BioSchemaProtein bioSchemaProtein = objectMapper.readValue(compactContent, BioSchemaProtein.class); log.debug("BioSchema id: " + bioSchemaProtein.getId()); BioSchemaProtein.DateTimeType retrievedOnType = bioSchemaProtein.getRetrievedOn(); List citations = bioSchemaProtein.getEntryList().stream().map(entry -> { if (entry.getCitation() != null) { BioSchemaProtein.Citation citationInfo = entry.getCitation(); return citationInfo.getId(); } return null; }).filter(id -> id != null).collect(Collectors.toList()); ArrayList results = new ArrayList(); bioSchemaProtein.getEntryList().stream().forEach(entry -> { if (entry.getType() != null && entry.getType().stream().filter(type -> type.equals("https://schema.org/Protein")).count() == 1) { DataciteProtein dataciteProtein = new DataciteProtein(); citations.forEach(citation -> { addRelatedIdentifier(dataciteProtein, citation, "IsCitedBy"); }); DataciteProtein.Types types = new DataciteProtein.Types(); types.setResourceType("Protein"); types.setResourceTypeGeneral("Dataset"); dataciteProtein.setTypes(types); DataciteProtein.DataciteDate dataciteDate = new DataciteProtein.DataciteDate(); dataciteDate.setDate(retrievedOnType.getValue()); dataciteDate.setDateType("Collected"); dataciteProtein.getDates().add(dataciteDate); if (entry.getName() != null) { log.debug("Name: " + entry.getName()); DataciteProtein.Title title = new DataciteProtein.Title(); title.setTitle(entry.getName()); dataciteProtein.getTitles().add(title); } DataciteProtein.Identifier identifier = new DataciteProtein.Identifier(); log.debug("Id: " + entry.getId()); identifier.setIdentifier(entry.getId()); identifier.setIdentifierType("URL"); dataciteProtein.getIdentifiers().add(identifier); if (entry.getIdentifier() != null) { log.debug("Identifier: " + entry.getIdentifier()); addAlternateIdentifier(dataciteProtein, entry.getIdentifier()); } if (entry.getDescription() != null) { log.debug("description: " + entry.getDescription()); DataciteProtein.Description description = new DataciteProtein.Description(); description.setDescription(entry.getDescription()); dataciteProtein.getDescriptions().add(description); } if (entry.getIsEncodedByBioChemEntity() != null) { log.debug("isEncodedByBioChemEntity: " + entry.getIsEncodedByBioChemEntity()); addRelatedIdentifier(dataciteProtein, entry.getIsEncodedByBioChemEntity(), ""); } if (entry.getUrl() != null) { log.debug("url: " + entry.getUrl()); addAlternateIdentifier(dataciteProtein, entry.getUrl()); } if (entry.getAlternateName() != null) { log.debug("alternateName: " + entry.getAlternateName()); DataciteProtein.Title title = new DataciteProtein.Title(); title.setTitle(entry.getAlternateName()); title.setTitleType("AlternativeTitle"); dataciteProtein.getTitles().add(title); } if (entry.getBioChemInteraction() != null) { entry.getBioChemInteraction().stream().filter(Objects::nonNull).forEach(bc -> { log.debug("bioChemInteraction: " + bc.getId()); addRelatedIdentifier(dataciteProtein, bc.getId(), ""); }); } if (entry.getBioChemSimilarity() != null) { entry.getBioChemSimilarity().stream().filter(Objects::nonNull).forEach(bc -> { log.debug("bioChemSimilarity: " + bc.getId()); addRelatedIdentifier(dataciteProtein, bc.getId(), ""); }); } if (entry.getHasMolecularFunction() != null) { log.debug("hasMolecularFunction: " + entry.getHasMolecularFunction()); addRelatedIdentifier(dataciteProtein, entry.getHasMolecularFunction(), ""); } if (entry.getIsInvolvedInBiologicalProcess() != null) { log.debug("isInvolvedInBiologicalProcess: " + entry.getIsInvolvedInBiologicalProcess()); addRelatedIdentifier(dataciteProtein, entry.getIsInvolvedInBiologicalProcess(), ""); } if (entry.getIsEncodedByBioChemEntity() != null) { log.debug("isEncodedByBioChemEntity: " + entry.getIsEncodedByBioChemEntity()); addRelatedIdentifier(dataciteProtein, entry.getIsEncodedByBioChemEntity(), ""); } if (entry.getIsPartOfBioChemEntity() != null) { log.debug("isPartOfBioChemEntity: " + entry.getIsPartOfBioChemEntity()); addRelatedIdentifier(dataciteProtein, entry.getIsPartOfBioChemEntity().getUrl(), ""); } if (entry.getSameAs() != null) { entry.getSameAs().stream().filter(Objects::nonNull).forEach(sameAs -> { log.debug("sameAs: " + sameAs.getId()); addRelatedIdentifier(dataciteProtein, sameAs.getId(), "IsIdenticalTo"); }); } if (entry.getAssociatedDisease() != null) { entry.getAssociatedDisease().stream().filter(Objects::nonNull).forEach(ad -> { log.debug("associated disease: " + ad.getName()); addRelatedIdentifier(dataciteProtein, ad.getName(), "IsIdenticalTo"); }); } String proteinId = ""; try { String[] identifierParts = dataciteProtein.getIdentifiers().get(0).getIdentifier().split("/"); proteinId = identifierParts[identifierParts.length - 1]; } catch (Exception e) { log.error("Identifier not found", e.getMessage()); } dataciteProtein.setId(proteinId); ObjectMapper mapper = new ObjectMapper(); try { StringWriter writer = new StringWriter(); mapper.writeValue(writer, dataciteProtein); results.add(writer.toString()); } catch (Exception e) { throw new RuntimeException(e); } } }); return results; } private void addRelatedIdentifier(DataciteProtein DataciteProtein, String relatedIdentifierValue, String relationType) { DataciteProtein.RelatedIdentifier relatedIdentifier = new DataciteProtein.RelatedIdentifier(); relatedIdentifier.setRelatedIdentifier(relatedIdentifierValue); if (!relationType.isEmpty()) { relatedIdentifier.setRelationType(relationType); } if (relatedIdentifierValue.contains("http://") || relatedIdentifierValue.contains("https://")) { relatedIdentifier.setRelatedIdentifierType("URL"); } DataciteProtein.getRelatedIdentifiers().add(relatedIdentifier); } private void addAlternateIdentifier(DataciteProtein DataciteProtein, String alternateIdentifierValue) { DataciteProtein.AlternateIdentifier alternateIdentifier = new DataciteProtein.AlternateIdentifier(); alternateIdentifier.setAlternateIdentifier(alternateIdentifierValue); DataciteProtein.getAlternateIdentifiers().add(alternateIdentifier); } }