AriadnePlus/dnet-ariadneplus-publisher/src/main/java/eu/dnetlib/ariadneplus/jrr/JRRPublisher.java

148 lines
5.8 KiB
Java

package eu.dnetlib.ariadneplus.jrr;
import java.io.IOException;
import java.net.URISyntaxException;
import javax.annotation.PostConstruct;
import eu.dnetlib.ariadneplus.CRM;
import eu.dnetlib.ariadneplus.CRMdig;
import eu.dnetlib.ariadneplus.CRMpe;
import eu.dnetlib.ariadneplus.catalogue.CatalogueRegistrator;
import eu.dnetlib.ariadneplus.publisher.AriadnePlusPublisherException;
import eu.dnetlib.ariadneplus.rdf.RecordParserHelper;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.ontology.OntModelSpec;
import org.apache.jena.rdf.model.*;
import org.apache.jena.vocabulary.RDF;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
/**
* Created by Alessia Bardi on 25/02/2018.
*
* publish on Gcube registry and catalogue.
*
*
* @author Alessia Bardi
*/
@Component
public class JRRPublisher {
private static final Log log = LogFactory.getLog(JRRPublisher.class);
private OntModel baseModel;
@Autowired
private CatalogueRegistrator catalogueRegistrator;
//@Autowired
//private GCubeResourceRegistrator gCubeResourceRegistrator;
@Autowired
private RecordParserHelper recordParserHelper;
@PostConstruct
public void init(){
baseModel = ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM_TRANS_INF);
baseModel.read(CRMpe.RDFS_URL);
baseModel.read(CRM.RDFS_URL);
baseModel.read(CRMdig.RDFS_URL);
}
public void register(final String record)
throws AriadnePlusPublisherException, IOException, URISyntaxException, InterruptedException {
String id = recordParserHelper.getObjIdentifier(record);
String datasourceName = recordParserHelper.getDatasourceName(record);
//objIdentifier contains the subject URI used to get the RDF: that is the only resource we have to register when processing this rdf file!
log.debug("REGISTERING ON JRR: "+id);
String rdfRecord = recordParserHelper.getRDF(record);
InfModel model = loadBaseModel();
model.read(IOUtils.toInputStream(rdfRecord), CRMpe.NS, "RDF/XML");
register(model, id, datasourceName);
}
protected void register(final Model model, final String resourceURI, final String datasourceName)
throws AriadnePlusPublisherException, IOException, URISyntaxException, InterruptedException {
Resource rdfResource = model.getResource(resourceURI);
if (rdfResource == null) {
log.error("UNEXPECTED NULL rdfResource with resourceURI " + resourceURI + ". I am skipping it, but you should check!");
} else {
//call the correct register method based on the resource type
//we skip everything that is not Software, Actor, Service, Dataset, Curation Plan and Project: other entities are in fact source of metadata for them
////base types are {Project, Service, Actors, Datasets, Software, Physical Collections, Standards} i.e. {PE35_Project, PE1_Service, E39_Actor, PE18_Dataset, D14_Software, E78_Collection, E29_Design_or_Procedure)
if (rdfResource.hasProperty(RDF.type, CRM.E39_Actor)) registerRDFResource(rdfResource, CRM.E39_Actor, datasourceName);
else {
if (rdfResource.hasProperty(RDF.type, CRMpe.PE35_Project)) registerRDFResource(rdfResource, CRMpe.PE35_Project, datasourceName);
else {
if (rdfResource.hasProperty(RDF.type, CRM.E29_Design_or_Procedure)) registerRDFResource(rdfResource, CRM.E29_Design_or_Procedure, datasourceName);
else {
if (rdfResource.hasProperty(RDF.type, CRMdig.D14_Software)) registerRDFResource(rdfResource, CRMdig.D14_Software, datasourceName);
else {
if (rdfResource.hasProperty(RDF.type, CRMpe.PE18_Dataset)) registerRDFResource(rdfResource, CRMpe.PE18_Dataset, datasourceName);
else {
if (rdfResource.hasProperty(RDF.type, CRM.E78_Collection)) registerRDFResource(rdfResource, CRM.E78_Collection, datasourceName);
else {
if (rdfResource.hasProperty(RDF.type, CRMpe.PE1_Service)) registerRDFResource(rdfResource, CRMpe.PE1_Service, datasourceName);
else {
log.debug("Skipping " + resourceURI + " because of its type");
}
}
}
}
}
}
}
}
}
protected boolean registerRDFResource(final Resource rdfResource, final Resource type, final String datasourceName)
throws AriadnePlusPublisherException, IOException, URISyntaxException, InterruptedException {
String resURI = rdfResource.getURI();
if (!resURI.startsWith("http")) {
//this is something George said: if it has no http URI, then it is not to be considered relevant by itself
log.info("Resource " + resURI + " skipped: URI does not start with http");
return false;
}
else {
String uuid = registerOnCatalogue(rdfResource, type, datasourceName);
if(StringUtils.isNotBlank(uuid)){
//TODO: let's skip the registration on the registry for now.
//registerOnRegistry(rdfResource, uuid, type);
return true;
}
else{
log.warn("Got blank uuid when registering "+resURI+": skipping registration on the registry");
return false;
}
}
}
/**
* Register resource of the given type on the catalogue
* @param rdfResource
* @param type
* @return the catalogue uuid
* @throws IOException
*/
protected String registerOnCatalogue(final Resource rdfResource, final Resource type, final String datasourceName)
throws AriadnePlusPublisherException, IOException, URISyntaxException, InterruptedException {
return catalogueRegistrator.register(rdfResource, type, datasourceName);
}
/*
protected void registerOnRegistry(final Resource rdfResource, final String uuid, final Resource type)
throws AriadnePlusPublisherException, ResourceRegistryException, IOException {
gCubeResourceRegistrator.register(rdfResource, uuid, type);
}
*/
protected InfModel loadBaseModel() {
return ModelFactory.createRDFSModel(baseModel);
}
}