2020-01-28 16:53:59 +01:00
|
|
|
package eu.dnetlib.ariadneplus.reader;
|
|
|
|
|
2020-06-10 19:39:53 +02:00
|
|
|
import eu.dnetlib.ariadneplus.elasticsearch.BulkUpload;
|
|
|
|
import eu.dnetlib.ariadneplus.reader.json.ParseRDFJSON;
|
2020-06-12 18:14:41 +02:00
|
|
|
import org.apache.commons.logging.Log;
|
|
|
|
import org.apache.commons.logging.LogFactory;
|
2020-01-28 16:53:59 +01:00
|
|
|
import org.eclipse.rdf4j.model.Model;
|
2020-06-11 18:20:42 +02:00
|
|
|
import org.eclipse.rdf4j.query.*;
|
2020-01-28 16:53:59 +01:00
|
|
|
import org.eclipse.rdf4j.repository.Repository;
|
|
|
|
import org.eclipse.rdf4j.repository.RepositoryConnection;
|
|
|
|
import org.eclipse.rdf4j.repository.manager.RemoteRepositoryManager;
|
|
|
|
import org.eclipse.rdf4j.rio.RDFFormat;
|
|
|
|
import org.eclipse.rdf4j.rio.RDFWriter;
|
|
|
|
import org.eclipse.rdf4j.rio.Rio;
|
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
2020-06-10 19:39:53 +02:00
|
|
|
import java.io.StringWriter;
|
2020-06-11 18:20:42 +02:00
|
|
|
import java.util.ArrayList;
|
2020-06-10 19:39:53 +02:00
|
|
|
import java.util.Calendar;
|
2020-06-11 18:20:42 +02:00
|
|
|
import java.util.List;
|
2020-01-28 16:53:59 +01:00
|
|
|
|
|
|
|
@Service
|
|
|
|
public class RunSPARQLQueryService {
|
|
|
|
|
2020-06-12 18:14:41 +02:00
|
|
|
private static final Log log = LogFactory.getLog(RunSPARQLQueryService.class);
|
2020-01-28 16:53:59 +01:00
|
|
|
|
|
|
|
private RepositoryConnection connection;
|
|
|
|
private RemoteRepositoryManager manager;
|
|
|
|
private Repository repository;
|
|
|
|
|
|
|
|
private ParseRDFJSON parser;
|
|
|
|
private ResourceManager resourceManager;
|
|
|
|
private BulkUpload bulkUpload;
|
|
|
|
|
2020-06-10 19:39:53 +02:00
|
|
|
private static String username = null;
|
|
|
|
private static String pwd = null;
|
|
|
|
private static String graphDBUrl = null;
|
|
|
|
private static String graphDBRepository = null;
|
|
|
|
|
|
|
|
public void setupConnection(String username, String pwd, String graphDbUrl, String graphDbRepository) {
|
|
|
|
setUsername(username);
|
|
|
|
setPwd(pwd);
|
|
|
|
setGraphDBUrl(graphDbUrl);
|
|
|
|
setGraphDBRepository(graphDbRepository);
|
|
|
|
}
|
|
|
|
|
|
|
|
private void openConnection(){
|
|
|
|
manager = new RemoteRepositoryManager(getGraphDBUrl());
|
2020-01-28 16:53:59 +01:00
|
|
|
manager.init();
|
2020-06-10 19:39:53 +02:00
|
|
|
manager.setUsernameAndPassword(getUsername(), getPwd());
|
|
|
|
repository = manager.getRepository(getGraphDBRepository());
|
2020-01-28 16:53:59 +01:00
|
|
|
connection = repository.getConnection();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
private void closeConnection(){
|
|
|
|
connection.close();
|
|
|
|
repository.shutDown();
|
|
|
|
manager.shutDown();
|
|
|
|
}
|
|
|
|
|
2020-06-16 02:36:16 +02:00
|
|
|
public String executeMultipleQueryGraph(String queryTemplate, List<String> recordIds, String datasource, String collectionId, boolean isCollection){
|
2020-06-11 18:20:42 +02:00
|
|
|
if (queryTemplate==null)
|
|
|
|
return null;
|
|
|
|
final String selectQueryTemplate = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
|
|
|
|
recordIds.forEach(recordId -> {
|
2020-06-16 02:36:16 +02:00
|
|
|
executeQueryGraph(selectQueryTemplate, recordId, isCollection);
|
2020-06-11 18:20:42 +02:00
|
|
|
});
|
|
|
|
return "ok";
|
|
|
|
}
|
2020-06-10 19:39:53 +02:00
|
|
|
|
2020-06-16 02:36:16 +02:00
|
|
|
private String executeQueryGraph(String selectQueryTemplate, String recordId, boolean isCollection){
|
|
|
|
log.debug("Retrieving "+recordId+" - isCollection:"+isCollection );
|
2020-07-07 13:39:22 +02:00
|
|
|
String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">");
|
|
|
|
log.debug(query);
|
|
|
|
openConnection();
|
2020-01-28 16:53:59 +01:00
|
|
|
StringWriter recordWriter = null;
|
2020-06-10 19:39:53 +02:00
|
|
|
Model resultsModel = null;
|
2020-01-28 16:53:59 +01:00
|
|
|
String jsonRecord = null;
|
|
|
|
try {
|
2020-06-12 18:14:41 +02:00
|
|
|
log.debug("Started at: "+Calendar.getInstance().getTime().toString());
|
2020-06-10 19:39:53 +02:00
|
|
|
GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query);
|
2020-01-28 16:53:59 +01:00
|
|
|
GraphQueryResult graphQueryResult = graphQuery.evaluate();
|
2020-06-10 19:39:53 +02:00
|
|
|
resultsModel = QueryResults.asModel(graphQueryResult);
|
2020-01-28 16:53:59 +01:00
|
|
|
graphQueryResult.close();
|
2020-06-12 18:14:41 +02:00
|
|
|
log.debug("Finished at: "+Calendar.getInstance().getTime().toString());
|
|
|
|
log.debug("Statements retrieved: " + resultsModel.size());
|
2020-06-11 18:20:42 +02:00
|
|
|
if (resultsModel.size()==0) {
|
|
|
|
return "noresult";
|
|
|
|
}
|
|
|
|
recordWriter = new StringWriter();
|
2020-06-10 19:39:53 +02:00
|
|
|
RDFWriter rdfRecordWriter = Rio.createWriter(RDFFormat.RDFJSON, recordWriter);
|
|
|
|
Rio.write(resultsModel, rdfRecordWriter);
|
2020-06-16 02:36:16 +02:00
|
|
|
if (isCollection) {
|
|
|
|
parser.setCollection(true);
|
|
|
|
}
|
2020-06-10 19:39:53 +02:00
|
|
|
parser.parse(recordWriter.toString());
|
|
|
|
resourceManager.manage(parser);
|
2020-07-07 13:39:22 +02:00
|
|
|
bulkUpload.index(resourceManager, isCollection);
|
2020-06-10 19:39:53 +02:00
|
|
|
} catch(Exception e){
|
2020-01-28 16:53:59 +01:00
|
|
|
e.printStackTrace();
|
2020-06-10 19:39:53 +02:00
|
|
|
} finally{
|
2020-01-28 16:53:59 +01:00
|
|
|
closeConnection();
|
2020-06-10 19:39:53 +02:00
|
|
|
if (resultsModel!=null) {
|
|
|
|
resultsModel.clear();
|
2020-01-28 16:53:59 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return jsonRecord;
|
|
|
|
}
|
|
|
|
|
|
|
|
public ParseRDFJSON getParser() {
|
|
|
|
return parser;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setParser(ParseRDFJSON parser) {
|
|
|
|
this.parser = parser;
|
|
|
|
}
|
|
|
|
|
|
|
|
public ResourceManager getResourceManager() {
|
|
|
|
return resourceManager;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setResourceManager(ResourceManager resourceManager) {
|
|
|
|
this.resourceManager = resourceManager;
|
|
|
|
}
|
|
|
|
|
|
|
|
public BulkUpload getBulkUpload() {
|
|
|
|
return bulkUpload;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setBulkUpload(BulkUpload bulkUpload) {
|
|
|
|
this.bulkUpload = bulkUpload;
|
|
|
|
}
|
|
|
|
|
2020-06-10 19:39:53 +02:00
|
|
|
public static String getUsername() {
|
|
|
|
return username;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static String getPwd() {
|
|
|
|
return pwd;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static String getGraphDBUrl() {
|
|
|
|
return graphDBUrl;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static String getGraphDBRepository() {
|
|
|
|
return graphDBRepository;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static void setUsername(String username) {
|
|
|
|
RunSPARQLQueryService.username = username;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static void setPwd(String pwd) {
|
|
|
|
RunSPARQLQueryService.pwd = pwd;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static void setGraphDBUrl(String graphDBUrl) {
|
|
|
|
RunSPARQLQueryService.graphDBUrl = graphDBUrl;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static void setGraphDBRepository(String graphDBRepository) {
|
|
|
|
RunSPARQLQueryService.graphDBRepository = graphDBRepository;
|
|
|
|
}
|
2020-06-11 18:20:42 +02:00
|
|
|
|
2020-06-12 18:14:41 +02:00
|
|
|
public List<String> selectRecordIds(String datasource, String collectionId){
|
2020-06-15 12:10:59 +02:00
|
|
|
log.debug("Retrieving record Ids from GraphDB ...");
|
2020-06-12 18:14:41 +02:00
|
|
|
String queryTemplate = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
|
2020-06-11 18:20:42 +02:00
|
|
|
"select * \n" +
|
2020-06-12 18:14:41 +02:00
|
|
|
"from <https://ariadne-infrastructure.eu/api_________::ariadne_plus::%datasource::%collectionId>\n" +
|
2020-06-11 18:20:42 +02:00
|
|
|
"where { \n" +
|
|
|
|
"\t?recordId rdf:type <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/AO_Individual_Data_Resource> .\n" +
|
|
|
|
"} \n";
|
2020-06-12 18:14:41 +02:00
|
|
|
String query = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
|
2020-06-15 12:10:59 +02:00
|
|
|
return executeSelect(query);
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<String> selectCollectionId(String datasource, String collectionId){
|
|
|
|
log.debug("Retrieving collection Id from GraphDB ...");
|
|
|
|
String queryTemplate = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
|
|
|
|
"select * \n" +
|
|
|
|
"from <https://ariadne-infrastructure.eu/api_________::ariadne_plus::%datasource::%collectionId>\n" +
|
|
|
|
"where { \n" +
|
|
|
|
"\t?recordId rdf:type <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/AO_Collection> .\n" +
|
|
|
|
"} \n";
|
|
|
|
String query = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
|
|
|
|
return executeSelect(query);
|
|
|
|
}
|
|
|
|
|
|
|
|
private List<String> executeSelect(String query){
|
2020-06-11 18:20:42 +02:00
|
|
|
openConnection();
|
|
|
|
String jsonRecord = null;
|
2020-06-15 12:10:59 +02:00
|
|
|
List<String> results = new ArrayList<>();
|
2020-06-11 18:20:42 +02:00
|
|
|
try {
|
2020-06-12 18:14:41 +02:00
|
|
|
log.debug("Started at: "+Calendar.getInstance().getTime().toString());
|
2020-06-11 18:20:42 +02:00
|
|
|
TupleQuery selectQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query);
|
|
|
|
TupleQueryResult selectQueryResult = selectQuery.evaluate();
|
|
|
|
int counter = 0;
|
|
|
|
while (selectQueryResult.hasNext()) {
|
|
|
|
BindingSet recordSet = selectQueryResult.next();
|
|
|
|
org.eclipse.rdf4j.model.Value recordIdValue = recordSet.getValue("recordId");
|
2020-06-15 12:10:59 +02:00
|
|
|
results.add(recordIdValue.stringValue());
|
2020-06-11 18:20:42 +02:00
|
|
|
counter++;
|
|
|
|
}
|
2020-06-15 12:10:59 +02:00
|
|
|
log.debug("Total records retrieved: "+counter);
|
2020-06-12 18:14:41 +02:00
|
|
|
log.debug("Finished at: "+Calendar.getInstance().getTime().toString());
|
2020-06-11 18:20:42 +02:00
|
|
|
} catch(Exception e){
|
|
|
|
e.printStackTrace();
|
|
|
|
} finally{
|
|
|
|
closeConnection();
|
|
|
|
}
|
2020-06-15 12:10:59 +02:00
|
|
|
return results;
|
2020-06-11 18:20:42 +02:00
|
|
|
}
|
2020-01-28 16:53:59 +01:00
|
|
|
}
|