2019-12-12 12:58:30 +01:00
|
|
|
package eu.dnetlib.ariadneplus.graphdb;
|
|
|
|
|
2020-05-30 17:21:03 +02:00
|
|
|
import java.io.File;
|
|
|
|
import java.net.URL;
|
2020-06-12 18:14:41 +02:00
|
|
|
import java.nio.charset.StandardCharsets;
|
2019-12-13 14:53:15 +01:00
|
|
|
import java.time.LocalDateTime;
|
2020-06-12 18:14:41 +02:00
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Properties;
|
2019-12-13 14:53:15 +01:00
|
|
|
|
2020-06-12 18:14:41 +02:00
|
|
|
import eu.dnetlib.ariadneplus.elasticsearch.BulkUpload;
|
|
|
|
import eu.dnetlib.ariadneplus.reader.ResourceManager;
|
|
|
|
import eu.dnetlib.ariadneplus.reader.RunSPARQLQueryService;
|
|
|
|
import eu.dnetlib.ariadneplus.reader.json.ParseRDFJSON;
|
2020-05-30 17:21:03 +02:00
|
|
|
import org.apache.commons.io.FileUtils;
|
2019-12-12 12:58:30 +01:00
|
|
|
import org.apache.commons.io.IOUtils;
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
|
import org.eclipse.rdf4j.RDF4JException;
|
|
|
|
import org.eclipse.rdf4j.model.IRI;
|
2019-12-13 14:53:15 +01:00
|
|
|
import org.eclipse.rdf4j.model.Statement;
|
2019-12-12 12:58:30 +01:00
|
|
|
import org.eclipse.rdf4j.model.ValueFactory;
|
2020-05-27 23:07:08 +02:00
|
|
|
import org.eclipse.rdf4j.query.*;
|
2019-12-12 12:58:30 +01:00
|
|
|
import org.eclipse.rdf4j.repository.Repository;
|
|
|
|
import org.eclipse.rdf4j.repository.RepositoryConnection;
|
|
|
|
import org.eclipse.rdf4j.repository.manager.RemoteRepositoryManager;
|
2020-01-15 12:30:18 +01:00
|
|
|
import org.eclipse.rdf4j.repository.util.Repositories;
|
2019-12-12 12:58:30 +01:00
|
|
|
import org.eclipse.rdf4j.rio.RDFFormat;
|
|
|
|
|
|
|
|
import eu.dnetlib.ariadneplus.publisher.AriadnePlusPublisherException;
|
|
|
|
import eu.dnetlib.ariadneplus.rdf.RecordParserHelper;
|
|
|
|
import net.sf.saxon.s9api.SaxonApiException;
|
2020-06-12 18:14:41 +02:00
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
import org.springframework.core.io.ClassPathResource;
|
|
|
|
import org.springframework.stereotype.Component;
|
2019-12-12 12:58:30 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @author enrico.ottonello
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2020-06-12 18:14:41 +02:00
|
|
|
@Component
|
2019-12-12 12:58:30 +01:00
|
|
|
public class GraphDBClient {
|
|
|
|
|
|
|
|
private static final Log log = LogFactory.getLog(GraphDBClient.class);
|
|
|
|
|
2019-12-13 14:53:15 +01:00
|
|
|
public static final String PROVENANCE_NS = "http://www.d-net.research-infrastructures.eu/provenance/";
|
2020-06-12 18:14:41 +02:00
|
|
|
public static final int NUM_RECORDS_THRESHOLD = 10;
|
|
|
|
|
|
|
|
@Autowired
|
|
|
|
private RunSPARQLQueryService runSPQRLQuery;
|
|
|
|
@Autowired
|
|
|
|
private ParseRDFJSON parseRDFJSON;
|
|
|
|
@Autowired
|
|
|
|
private ResourceManager resourceManager;
|
|
|
|
@Autowired
|
|
|
|
private BulkUpload bulkUpload;
|
|
|
|
|
2019-12-12 12:58:30 +01:00
|
|
|
private RecordParserHelper recordParserHelper;
|
|
|
|
private String graphDBServerUrl;
|
2019-12-13 14:53:15 +01:00
|
|
|
private String graphDBBaseURI;
|
|
|
|
private String writerUser;
|
|
|
|
private String writerPwd;
|
2019-12-16 14:46:42 +01:00
|
|
|
private String repository;
|
2019-12-13 14:53:15 +01:00
|
|
|
|
2020-06-12 18:14:41 +02:00
|
|
|
protected void setup(final RecordParserHelper recordParserHelper,
|
2019-12-16 14:46:42 +01:00
|
|
|
final String graphDBServerUrl, final String graphDBBaseURI, final String writerUser, final String writerPwd, final String repository) {
|
2019-12-12 12:58:30 +01:00
|
|
|
this.recordParserHelper = recordParserHelper;
|
|
|
|
this.graphDBServerUrl = graphDBServerUrl;
|
2019-12-13 14:53:15 +01:00
|
|
|
this.graphDBBaseURI = graphDBBaseURI;
|
|
|
|
this.writerUser = writerUser;
|
|
|
|
this.writerPwd = writerPwd;
|
2019-12-16 14:46:42 +01:00
|
|
|
this.repository = repository;
|
2019-12-12 12:58:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
public long feed(final String record) throws AriadnePlusPublisherException{
|
|
|
|
try {
|
|
|
|
String objIdentifier = recordParserHelper.getObjIdentifier(record);
|
|
|
|
if (StringUtils.isBlank(objIdentifier)) {
|
|
|
|
log.warn("Got record with no objIdentifier -- skipping");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
|
|
|
|
manager.init();
|
2019-12-13 14:53:15 +01:00
|
|
|
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
|
2019-12-16 14:46:42 +01:00
|
|
|
Repository repository = manager.getRepository(getRepository());
|
2019-12-12 12:58:30 +01:00
|
|
|
ValueFactory factory = repository.getValueFactory();
|
2020-01-14 16:55:45 +01:00
|
|
|
String datasourceApi = recordParserHelper.getDatasourceApi(record);
|
2020-02-19 14:33:54 +01:00
|
|
|
IRI graph = factory.createIRI(getGraphDBBaseURI(), datasourceApi);
|
2019-12-12 12:58:30 +01:00
|
|
|
try (RepositoryConnection con = repository.getConnection()) {
|
|
|
|
con.begin();
|
2020-01-14 16:55:45 +01:00
|
|
|
String recordURI = getRecordURI(objIdentifier, datasourceApi);
|
2020-07-24 12:34:06 +02:00
|
|
|
// log.debug("Trying to adding record with recordURI " + recordURI + " into graph " + graph);
|
2020-02-19 14:33:54 +01:00
|
|
|
con.add(IOUtils.toInputStream(getRDFBlock(record), "UTF-8"), recordURI, RDFFormat.RDFXML, graph);
|
2019-12-12 12:58:30 +01:00
|
|
|
con.commit();
|
2020-07-24 12:34:06 +02:00
|
|
|
// log.debug("statement added");
|
2019-12-12 12:58:30 +01:00
|
|
|
con.close();
|
|
|
|
}
|
|
|
|
catch (RDF4JException e) {
|
2020-02-19 14:33:54 +01:00
|
|
|
log.error("error adding statement ...", e);
|
2019-12-12 12:58:30 +01:00
|
|
|
}
|
|
|
|
repository.shutDown();
|
|
|
|
manager.shutDown();
|
|
|
|
return 1;
|
|
|
|
}catch(Throwable e){
|
|
|
|
log.error(e);
|
|
|
|
throw new AriadnePlusPublisherException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-24 10:52:04 +01:00
|
|
|
public long feedProvenance(final String datasource, final String datasourceApi) throws AriadnePlusPublisherException {
|
2019-12-13 14:53:15 +01:00
|
|
|
|
|
|
|
try {
|
|
|
|
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
|
|
|
|
manager.init();
|
2019-12-16 14:46:42 +01:00
|
|
|
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
|
|
|
|
Repository repository = manager.getRepository(getRepository());
|
2019-12-13 14:53:15 +01:00
|
|
|
ValueFactory factory = repository.getValueFactory();
|
|
|
|
IRI IS_API_OF = factory.createIRI(PROVENANCE_NS, "isApiOf");
|
|
|
|
IRI INSERTED_IN_DATE = factory.createIRI(PROVENANCE_NS, "insertedInDate");
|
|
|
|
IRI rApi = factory.createIRI(getGraphDBBaseURI(), datasourceApi);
|
2020-01-24 10:52:04 +01:00
|
|
|
Statement stmApi = factory.createStatement(rApi, IS_API_OF, factory.createLiteral(datasource));
|
2019-12-13 14:53:15 +01:00
|
|
|
LocalDateTime now = LocalDateTime.now();
|
|
|
|
Statement stmInsertedDate = factory.createStatement(rApi, INSERTED_IN_DATE, factory.createLiteral(now.toString()));
|
|
|
|
IRI datasourceApisGraph = factory.createIRI(getGraphDBBaseURI(), "datasourceApis");
|
|
|
|
try (RepositoryConnection con = repository.getConnection()) {
|
|
|
|
con.begin();
|
2020-07-24 12:34:06 +02:00
|
|
|
// log.debug("Adding stmt " + stmApi.toString() + " into graph " + datasourceApisGraph.toString());
|
2020-01-24 10:52:04 +01:00
|
|
|
con.remove(rApi, INSERTED_IN_DATE, null, datasourceApisGraph);
|
2019-12-13 14:53:15 +01:00
|
|
|
con.add(stmApi, datasourceApisGraph);
|
2020-07-24 12:34:06 +02:00
|
|
|
// log.debug("Adding stmt " + stmInsertedDate.toString() + " into graph " + datasourceApisGraph.toString());
|
2019-12-13 14:53:15 +01:00
|
|
|
con.add(stmInsertedDate, datasourceApisGraph);
|
|
|
|
con.commit();
|
2020-07-24 12:34:06 +02:00
|
|
|
// log.debug("statements added");
|
2019-12-13 14:53:15 +01:00
|
|
|
con.close();
|
|
|
|
}
|
|
|
|
catch (RDF4JException e) {
|
|
|
|
log.error("error adding statement ...", e);
|
|
|
|
throw new AriadnePlusPublisherException(e);
|
|
|
|
}
|
|
|
|
repository.shutDown();
|
|
|
|
manager.shutDown();
|
|
|
|
return 200;
|
|
|
|
}
|
|
|
|
catch(Throwable e){
|
|
|
|
log.error(e);
|
|
|
|
throw new AriadnePlusPublisherException(e);
|
|
|
|
}
|
|
|
|
}
|
2019-12-12 12:58:30 +01:00
|
|
|
|
2020-02-19 14:33:54 +01:00
|
|
|
public long dropDatasourceApiGraph(final String datasourceApi) throws AriadnePlusPublisherException {
|
2020-01-14 16:55:45 +01:00
|
|
|
|
|
|
|
try {
|
|
|
|
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
|
|
|
|
manager.init();
|
|
|
|
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
|
|
|
|
Repository repository = manager.getRepository(getRepository());
|
2020-01-29 16:43:25 +01:00
|
|
|
if (repository==null) {
|
|
|
|
throw new AriadnePlusPublisherException("GraphDB repository not found");
|
|
|
|
}
|
2020-01-14 16:55:45 +01:00
|
|
|
ValueFactory factory = repository.getValueFactory();
|
|
|
|
IRI rApi = factory.createIRI(getGraphDBBaseURI(), datasourceApi);
|
|
|
|
try (RepositoryConnection con = repository.getConnection()) {
|
2020-02-19 14:33:54 +01:00
|
|
|
log.debug("removing namedGraph: " + rApi);
|
|
|
|
Repositories.consume(repository, conn -> conn.clear(rApi));
|
2020-01-14 16:55:45 +01:00
|
|
|
}
|
|
|
|
catch (RDF4JException e) {
|
|
|
|
log.error("error removing datasourceApi partition info ", e);
|
|
|
|
throw new AriadnePlusPublisherException(e);
|
|
|
|
}
|
|
|
|
repository.shutDown();
|
|
|
|
manager.shutDown();
|
|
|
|
return 200;
|
|
|
|
}
|
|
|
|
catch(Throwable e){
|
|
|
|
log.error("error removing datasourceApi partition info ", e);
|
|
|
|
throw new AriadnePlusPublisherException(e);
|
|
|
|
}
|
2019-12-12 12:58:30 +01:00
|
|
|
}
|
2020-01-15 12:30:18 +01:00
|
|
|
|
2019-12-12 12:58:30 +01:00
|
|
|
private String getRecordURI(final String objIdentifier, final String datasourceApi) {
|
|
|
|
return "/" + datasourceApi + "/" + objIdentifier;
|
|
|
|
}
|
|
|
|
|
|
|
|
public RecordParserHelper getRecordParserHelper() {
|
|
|
|
return recordParserHelper;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setRecordParserHelper(final RecordParserHelper recordParserHelper) {
|
|
|
|
this.recordParserHelper = recordParserHelper;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setDefaultBaseURI(final String defaultBaseURI) {
|
|
|
|
this.graphDBServerUrl = defaultBaseURI;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getRDFBlock(final String record) throws SaxonApiException{
|
|
|
|
recordParserHelper.init();
|
|
|
|
try {
|
|
|
|
if (StringUtils.isBlank(record)) {
|
|
|
|
log.warn("Got empty record");
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
String objIdentifier = recordParserHelper.getObjIdentifier(record);
|
|
|
|
if (StringUtils.isBlank(objIdentifier)) {
|
|
|
|
log.warn("Got record with no objIdentifier -- skipping");
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
String rdfBlock = recordParserHelper.getRDF(record);
|
|
|
|
if (StringUtils.isBlank(rdfBlock)) {
|
|
|
|
log.warn("Missing rdf:RDF in record with objIdentifier " + objIdentifier);
|
|
|
|
}
|
|
|
|
return rdfBlock;
|
|
|
|
}catch(Throwable e){
|
|
|
|
log.error(e);
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
}
|
2019-12-13 14:53:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
public String getGraphDBBaseURI() {
|
|
|
|
return graphDBBaseURI;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public void setGraphDBBaseURI(String graphDBBaseURI) {
|
|
|
|
this.graphDBBaseURI = graphDBBaseURI;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public String getWriterUser() {
|
|
|
|
return writerUser;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public void setWriterUser(String writerUser) {
|
|
|
|
this.writerUser = writerUser;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public String getWriterPwd() {
|
|
|
|
return writerPwd;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public void setWriterPwd(String writerPwd) {
|
|
|
|
this.writerPwd = writerPwd;
|
|
|
|
}
|
2019-12-16 14:46:42 +01:00
|
|
|
|
|
|
|
|
|
|
|
public String getRepository() {
|
|
|
|
return repository;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public void setRepository(String repository) {
|
|
|
|
this.repository = repository;
|
|
|
|
}
|
2019-12-12 12:58:30 +01:00
|
|
|
|
2020-05-29 16:19:55 +02:00
|
|
|
public String updateSparql(final String queryValue) throws AriadnePlusPublisherException{
|
2020-05-27 23:07:08 +02:00
|
|
|
try {
|
|
|
|
String result = new String("");
|
|
|
|
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
|
|
|
|
manager.init();
|
|
|
|
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
|
|
|
|
Repository repository = manager.getRepository(getRepository());
|
|
|
|
ValueFactory factory = repository.getValueFactory();
|
|
|
|
try (RepositoryConnection con = repository.getConnection()) {
|
|
|
|
con.begin();
|
2019-12-12 12:58:30 +01:00
|
|
|
|
2020-05-27 23:07:08 +02:00
|
|
|
Update updateResultQuery = con.prepareUpdate(queryValue);
|
|
|
|
if (updateResultQuery!=null) {
|
|
|
|
updateResultQuery.execute();
|
|
|
|
result = "updated";
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
result = "No result.";
|
|
|
|
}
|
2019-12-12 12:58:30 +01:00
|
|
|
|
2020-05-27 23:07:08 +02:00
|
|
|
log.debug("query result: "+result);
|
|
|
|
con.commit();
|
|
|
|
log.debug("query executed");
|
|
|
|
con.close();
|
|
|
|
}
|
|
|
|
catch (RDF4JException e) {
|
|
|
|
log.error("error executing query ...", e);
|
|
|
|
}
|
|
|
|
repository.shutDown();
|
|
|
|
manager.shutDown();
|
|
|
|
return result;
|
|
|
|
}catch(Throwable e){
|
|
|
|
log.error(e);
|
|
|
|
throw new AriadnePlusPublisherException(e);
|
|
|
|
}
|
|
|
|
}
|
2020-05-30 17:21:03 +02:00
|
|
|
|
|
|
|
public String feedFromURL(final String dataUrl, final String context) throws AriadnePlusPublisherException{
|
|
|
|
try {
|
|
|
|
String result = new String("");
|
|
|
|
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
|
|
|
|
manager.init();
|
|
|
|
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
|
|
|
|
Repository repository = manager.getRepository(getRepository());
|
|
|
|
ValueFactory factory = repository.getValueFactory();
|
|
|
|
try (RepositoryConnection con = repository.getConnection()) {
|
|
|
|
con.begin();
|
|
|
|
String baseUri = null;
|
|
|
|
IRI contextIRI = factory.createIRI(getGraphDBBaseURI(), context);
|
|
|
|
con.add(new URL(dataUrl), baseUri, RDFFormat.TURTLE, contextIRI);
|
|
|
|
result.concat("data added from url: "+dataUrl+" into graph "+context);
|
|
|
|
con.commit();
|
|
|
|
log.debug("add data from Url executed");
|
|
|
|
con.close();
|
|
|
|
}
|
|
|
|
catch (RDF4JException e) {
|
|
|
|
log.error("error executing query ...", e);
|
|
|
|
}
|
|
|
|
repository.shutDown();
|
|
|
|
manager.shutDown();
|
|
|
|
return result;
|
|
|
|
}catch(Throwable e){
|
|
|
|
log.error(e);
|
|
|
|
throw new AriadnePlusPublisherException(e);
|
|
|
|
}
|
|
|
|
}
|
2020-06-12 18:14:41 +02:00
|
|
|
|
|
|
|
public RunSPARQLQueryService getRunSPQRLQuery() {
|
|
|
|
return runSPQRLQuery;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setRunSPQRLQuery(RunSPARQLQueryService runSPQRLQuery) {
|
|
|
|
this.runSPQRLQuery = runSPQRLQuery;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String indexOnES(String datasource, String collectionId) throws AriadnePlusPublisherException {
|
2020-07-24 13:15:27 +02:00
|
|
|
String recordsIndexReport = "";
|
|
|
|
String collectionIndexReport = "";
|
2020-06-12 18:14:41 +02:00
|
|
|
try {
|
2020-07-24 12:34:06 +02:00
|
|
|
log.info("Start indexing from "+ datasource + " " + collectionId);
|
2020-06-12 18:14:41 +02:00
|
|
|
runSPQRLQuery.setupConnection( getWriterUser(), getWriterPwd(), this.graphDBServerUrl, getRepository());
|
|
|
|
runSPQRLQuery.setParser(parseRDFJSON);
|
|
|
|
runSPQRLQuery.setResourceManager(resourceManager);
|
|
|
|
runSPQRLQuery.setBulkUpload(bulkUpload);
|
2020-06-16 02:36:16 +02:00
|
|
|
List<String> recordIds = runSPQRLQuery.selectRecordIds(datasource, collectionId);
|
2020-06-12 18:14:41 +02:00
|
|
|
final ClassPathResource queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql");
|
|
|
|
String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name());
|
2020-06-16 02:36:16 +02:00
|
|
|
boolean isCollection = false;
|
2020-07-24 13:15:27 +02:00
|
|
|
recordsIndexReport = runSPQRLQuery.executeMultipleQueryGraph(queryTemplate, recordIds, datasource, collectionId, isCollection);
|
2020-06-16 02:36:16 +02:00
|
|
|
List<String> collectionResourceId = runSPQRLQuery.selectCollectionId(datasource, collectionId);
|
|
|
|
final ClassPathResource selectCollectionTemplateRes = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_collection_data_template.sparql");
|
|
|
|
String selectCollectionTemplate = IOUtils.toString(selectCollectionTemplateRes.getInputStream(), StandardCharsets.UTF_8.name());
|
|
|
|
isCollection = true;
|
2020-07-24 13:15:27 +02:00
|
|
|
collectionIndexReport = runSPQRLQuery.executeMultipleQueryGraph(selectCollectionTemplate, collectionResourceId, datasource, collectionId, isCollection);
|
2020-06-12 18:14:41 +02:00
|
|
|
}catch(Throwable e){
|
|
|
|
log.error(e);
|
|
|
|
throw new AriadnePlusPublisherException(e);
|
|
|
|
}
|
2020-07-24 13:15:27 +02:00
|
|
|
return "Records: ".concat(recordsIndexReport).concat(" Collection: ").concat(collectionIndexReport);
|
2020-06-12 18:14:41 +02:00
|
|
|
}
|
2020-05-27 23:07:08 +02:00
|
|
|
}
|