AriadnePlus/dnet-ariadneplus-graphdb-pu.../src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java

217 lines
7.2 KiB
Java

package eu.dnetlib.ariadneplus.reader;
import eu.dnetlib.ariadneplus.elasticsearch.BulkUpload;
import eu.dnetlib.ariadneplus.reader.json.ParseRDFJSON;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.query.*;
import org.eclipse.rdf4j.repository.Repository;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.repository.manager.RemoteRepositoryManager;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFWriter;
import org.eclipse.rdf4j.rio.Rio;
import org.springframework.stereotype.Service;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
@Service
public class RunSPARQLQueryService {
private static final Log log = LogFactory.getLog(RunSPARQLQueryService.class);
private RepositoryConnection connection;
private RemoteRepositoryManager manager;
private Repository repository;
private ParseRDFJSON parser;
private ResourceManager resourceManager;
private BulkUpload bulkUpload;
private static String username = null;
private static String pwd = null;
private static String graphDBUrl = null;
private static String graphDBRepository = null;
public void setupConnection(String username, String pwd, String graphDbUrl, String graphDbRepository) {
setUsername(username);
setPwd(pwd);
setGraphDBUrl(graphDbUrl);
setGraphDBRepository(graphDbRepository);
}
private void openConnection(){
manager = new RemoteRepositoryManager(getGraphDBUrl());
manager.init();
manager.setUsernameAndPassword(getUsername(), getPwd());
repository = manager.getRepository(getGraphDBRepository());
connection = repository.getConnection();
}
private void closeConnection(){
connection.close();
repository.shutDown();
manager.shutDown();
}
public String executeMultipleQueryGraph(String queryTemplate, List<String> recordIds, String datasource, String collectionId, boolean isCollection){
if (queryTemplate==null)
return null;
final String selectQueryTemplate = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
recordIds.forEach(recordId -> {
executeQueryGraph(selectQueryTemplate, recordId, isCollection);
});
return "ok";
}
private String executeQueryGraph(String selectQueryTemplate, String recordId, boolean isCollection){
log.debug("Retrieving "+recordId+" - isCollection:"+isCollection );
String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">");
log.debug(query);
openConnection();
StringWriter recordWriter = null;
Model resultsModel = null;
String jsonRecord = null;
try {
log.debug("Started at: "+Calendar.getInstance().getTime().toString());
GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query);
GraphQueryResult graphQueryResult = graphQuery.evaluate();
resultsModel = QueryResults.asModel(graphQueryResult);
graphQueryResult.close();
log.debug("Finished at: "+Calendar.getInstance().getTime().toString());
log.debug("Statements retrieved: " + resultsModel.size());
if (resultsModel.size()==0) {
return "noresult";
}
recordWriter = new StringWriter();
RDFWriter rdfRecordWriter = Rio.createWriter(RDFFormat.RDFJSON, recordWriter);
Rio.write(resultsModel, rdfRecordWriter);
if (isCollection) {
parser.setCollection(true);
}
log.debug("rdf > json : "+recordWriter.toString());
parser.parse(recordWriter.toString());
resourceManager.manage(parser);
bulkUpload.index(resourceManager, isCollection);
} catch(Exception e){
e.printStackTrace();
} finally{
closeConnection();
if (resultsModel!=null) {
resultsModel.clear();
}
}
return jsonRecord;
}
public ParseRDFJSON getParser() {
return parser;
}
public void setParser(ParseRDFJSON parser) {
this.parser = parser;
}
public ResourceManager getResourceManager() {
return resourceManager;
}
public void setResourceManager(ResourceManager resourceManager) {
this.resourceManager = resourceManager;
}
public BulkUpload getBulkUpload() {
return bulkUpload;
}
public void setBulkUpload(BulkUpload bulkUpload) {
this.bulkUpload = bulkUpload;
}
public static String getUsername() {
return username;
}
public static String getPwd() {
return pwd;
}
public static String getGraphDBUrl() {
return graphDBUrl;
}
public static String getGraphDBRepository() {
return graphDBRepository;
}
public static void setUsername(String username) {
RunSPARQLQueryService.username = username;
}
public static void setPwd(String pwd) {
RunSPARQLQueryService.pwd = pwd;
}
public static void setGraphDBUrl(String graphDBUrl) {
RunSPARQLQueryService.graphDBUrl = graphDBUrl;
}
public static void setGraphDBRepository(String graphDBRepository) {
RunSPARQLQueryService.graphDBRepository = graphDBRepository;
}
public List<String> selectRecordIds(String datasource, String collectionId){
log.debug("Retrieving record Ids from GraphDB ...");
String queryTemplate = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
"select * \n" +
"from <https://ariadne-infrastructure.eu/api_________::ariadne_plus::%datasource::%collectionId>\n" +
"where { \n" +
"\t?recordId rdf:type <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/AO_Individual_Data_Resource> .\n" +
"} \n";
String query = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
return executeSelect(query);
}
public List<String> selectCollectionId(String datasource, String collectionId){
log.debug("Retrieving collection Id from GraphDB ...");
String queryTemplate = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
"select * \n" +
"from <https://ariadne-infrastructure.eu/api_________::ariadne_plus::%datasource::%collectionId>\n" +
"where { \n" +
"\t?recordId rdf:type <https://www.ariadne-infrastructure.eu/resource/ao/cat/1.1/AO_Collection> .\n" +
"} \n";
String query = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
return executeSelect(query);
}
private List<String> executeSelect(String query){
openConnection();
String jsonRecord = null;
List<String> results = new ArrayList<>();
try {
log.debug("Started at: "+Calendar.getInstance().getTime().toString());
TupleQuery selectQuery = connection.prepareTupleQuery(QueryLanguage.SPARQL, query);
TupleQueryResult selectQueryResult = selectQuery.evaluate();
int counter = 0;
while (selectQueryResult.hasNext()) {
BindingSet recordSet = selectQueryResult.next();
org.eclipse.rdf4j.model.Value recordIdValue = recordSet.getValue("recordId");
results.add(recordIdValue.stringValue());
counter++;
}
log.debug("Total records retrieved: "+counter);
log.debug("Finished at: "+Calendar.getInstance().getTime().toString());
} catch(Exception e){
e.printStackTrace();
} finally{
closeConnection();
}
return results;
}
}