AriadnePlus/dnet-ariadneplus-publisher/src/main/java/eu/dnetlib/ariadneplus/virtuoso/VirtuosoClient.java

198 lines
7.5 KiB
Java

package eu.dnetlib.ariadneplus.virtuoso;
import eu.dnetlib.ariadneplus.publisher.AriadnePlusPublisherException;
import eu.dnetlib.ariadneplus.rdf.RecordParserHelper;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.rdf.model.*;
import virtuoso.jena.driver.VirtModel;
/**
* Created by Alessia Bardi on 12/07/2017.
*
* @author Alessia Bardi
*/
public class VirtuosoClient {
private static final Log log = LogFactory.getLog(VirtuosoClient.class);
public static final String PROVENANCE_NS = "http://www.d-net.research-infrastructures.eu/provenance/";
public static final String PROVENANCE_GRAPH = PROVENANCE_NS+"graph";
public static Property IS_API_OF = ResourceFactory.createProperty(PROVENANCE_NS, "isApiOf");
public static Property COLL_FROM = ResourceFactory.createProperty(PROVENANCE_NS, "collectedFrom");
public static Property COLL_IN_DATE = ResourceFactory.createProperty(PROVENANCE_NS, "collectedInDate");
public static Property TRANS_IN_DATE = ResourceFactory.createProperty(PROVENANCE_NS, "transformedInDate");
private RecordParserHelper recordParserHelper;
private String connectionString;
private String username;
private String password;
private String defaultBaseURI;
protected VirtuosoClient(final String connectionString,
final String username,
final String password,
final RecordParserHelper recordParserHelper,
final String defaultBaseURI) {
this.connectionString = connectionString;
this.username = username;
this.password = password;
this.recordParserHelper = recordParserHelper;
this.defaultBaseURI = defaultBaseURI;
}
//TODO: exploit new method eu.dnetlib.ariadneplus.publisher.SaxonHelper.Helper.parseXML() to avoid re-parsing the full record.
public long feed(final String record) throws AriadnePlusPublisherException{
Model md = null ;
try {
if (StringUtils.isBlank(record)) {
log.warn("Got empty record");
return 0;
}
String objIdentifier = recordParserHelper.getObjIdentifier(record);
if (StringUtils.isBlank(objIdentifier)) {
log.warn("Got record with no objIdentifier -- skipping");
return 0;
}
String rdfBlock = recordParserHelper.getRDF(record);
if (StringUtils.isBlank(rdfBlock)) {
log.warn("Missing rdf:RDF in record with objIdentifier " + objIdentifier + " all triples in that named graph will be deleted");
}
String collectionDate = recordParserHelper.getCollectionDate(record);
String transformationDate = recordParserHelper.getTransformationDate(record);
String datasource = recordParserHelper.getDatasourceName(record);
String dsInterface = recordParserHelper.getDatasourceApi(record);
String namedGraph = getRecordDefaultURI(objIdentifier, dsInterface);
log.debug("Trying to open the database model " + namedGraph+", connection string "+getConnectionString());
md = VirtModel.openDatabaseModel(namedGraph, getConnectionString(), getUsername(), getPassword());
log.debug("Opened virtuoso model for graph " + namedGraph);
md.removeAll();
log.debug("Removed all triples from graph " + namedGraph);
md.read(IOUtils.toInputStream(rdfBlock, "UTF-8"), getDefaultBaseURI());
long size = md.size();
log.debug("Graph " + namedGraph + " now has " + size + " triples");
long ntriples = feedProvenance(namedGraph, collectionDate, transformationDate, datasource, dsInterface);
log.debug("provenance graph for " + namedGraph + " updated with " + ntriples + " triples");
md.close();
return size;
}catch(Throwable e){
if (md != null && !md.isClosed()) md.close();
log.error(e);
throw new AriadnePlusPublisherException(e);
}
}
long feedProvenance(final String namedGraphURI, final String collectionDate, final String transformationDate, final String datasource, final String api) {
Model md = VirtModel.openDatabaseModel(PROVENANCE_GRAPH, getConnectionString(), getUsername(), getPassword());
Resource rApi = ResourceFactory.createResource(defaultBaseURI + api);
Resource r = ResourceFactory.createResource(namedGraphURI);
Statement stmApi =
ResourceFactory.createStatement(rApi, IS_API_OF, ResourceFactory.createPlainLiteral(datasource));
Statement stmCollFrom =
ResourceFactory.createStatement(r, COLL_FROM, rApi);
Statement stmCollDate = ResourceFactory
.createStatement(r, COLL_IN_DATE, ResourceFactory.createTypedLiteral(collectionDate, XSDDatatype.XSDdateTime));
Statement stmTransDate = ResourceFactory
.createStatement(r, TRANS_IN_DATE, ResourceFactory.createTypedLiteral(transformationDate, XSDDatatype.XSDdateTime));
//let's remove previous provenance statements for this resource:
md.removeAll(r, null, null);
//and add the new ones
md.add(stmApi).add(stmCollFrom).add(stmCollDate).add(stmTransDate);
md.close();
return 3;
}
public long feed(final Iterable<String> records) throws AriadnePlusPublisherException {
//TODO: can we do it in parallel? if all records have different objIdentifier it is safe, and this must be the case anyway, because the source of records is a D-Net mdstore.
long count = 0;
for (String r : records) count += this.feed(r);
return count;
}
/**
* Delete all triples in named graphs collected from the given api
* @param api the id of the API
* @return the number of triples deleted from the named graphs associated to the given api
*/
public long drop(final String api){
Model prov = VirtModel.openDatabaseModel(PROVENANCE_GRAPH, getConnectionString(), getUsername(), getPassword());
//look for all named graphs associated to the api
Resource rApi = ResourceFactory.createResource(defaultBaseURI + api);
long deletedTriples = 0;
final ResIterator resIterator = prov.listSubjectsWithProperty(COLL_FROM, rApi);
while (resIterator.hasNext()) {
Resource namedGraphURI = resIterator.nextResource();
//delete all triples belonging to the r named graph
deletedTriples += dropNamedGraph(namedGraphURI.getURI());
//delete the named graph from the provenance graph
prov.removeAll(namedGraphURI, null, null);
}
//delete the api from the provenance graph
prov.removeAll(null, null, rApi);
prov.removeAll(rApi, null, null);
prov.close();
return deletedTriples;
}
private long dropNamedGraph(String namedGraphURI){
Model namedGraph = VirtModel.openDatabaseModel(namedGraphURI, getConnectionString(), getUsername(), getPassword());
long deletedTriples = namedGraph.size();
namedGraph.removeAll();
namedGraph.close();
return deletedTriples;
}
private String getRecordDefaultURI(final String objIdentifier, final String datasourceApi) {
return defaultBaseURI + datasourceApi + "/" + objIdentifier;
}
public String getConnectionString() {
return connectionString;
}
public String getUsername() {
return username;
}
public String getPassword() {
return password;
}
public String getDefaultBaseURI() {
return defaultBaseURI;
}
public RecordParserHelper getRecordParserHelper() {
return recordParserHelper;
}
public void setRecordParserHelper(final RecordParserHelper recordParserHelper) {
this.recordParserHelper = recordParserHelper;
}
public void setConnectionString(final String connectionString) {
this.connectionString = connectionString;
}
public void setUsername(final String username) {
this.username = username;
}
public void setPassword(final String password) {
this.password = password;
}
public void setDefaultBaseURI(final String defaultBaseURI) {
this.defaultBaseURI = defaultBaseURI;
}
}