collection data are now indexed with all required fields by es mapping

This commit is contained in:
Enrico Ottonello 2020-07-08 22:00:28 +02:00
parent 364d25f2b7
commit 9358f1eaf2
6 changed files with 21 additions and 37 deletions

View File

@ -61,7 +61,6 @@ public class BulkUpload {
} }
else { else {
ace.setResourceType("dataset"); ace.setResourceType("dataset");
}
Spatial esSpatial = new Spatial(); Spatial esSpatial = new Spatial();
ace.getSpatial().stream().filter(s -> s.getPlaceName()!=null).forEach(s -> { ace.getSpatial().stream().filter(s -> s.getPlaceName()!=null).forEach(s -> {
esSpatial.setPlaceName(s.getPlaceName()); esSpatial.setPlaceName(s.getPlaceName());
@ -71,6 +70,8 @@ public class BulkUpload {
}); });
ace.getSpatial().clear(); ace.getSpatial().clear();
ace.setSpatial(Arrays.asList(esSpatial)); ace.setSpatial(Arrays.asList(esSpatial));
}
String uniqueIsPartOf = ace.getUniqueIsPartOf(); String uniqueIsPartOf = ace.getUniqueIsPartOf();
if (uniqueIsPartOf!=null) { if (uniqueIsPartOf!=null) {
ace.setIsPartOf(Arrays.asList(uniqueIsPartOf)); ace.setIsPartOf(Arrays.asList(uniqueIsPartOf));
@ -102,10 +103,8 @@ public class BulkUpload {
testPublisher.setName("TEST"); testPublisher.setName("TEST");
ace.getPublisher().add(testPublisher); ace.getPublisher().add(testPublisher);
String[] splits = ace.getIdentifier().split("/"); String[] splits = ace.getIdentifier().split("/");
log.debug("indexing: "+ace.toJson());
request.add(new IndexRequest(elasticSearchIndexName).id(splits[splits.length-1]) request.add(new IndexRequest(elasticSearchIndexName).id(splits[splits.length-1])
.source(ace.toJson(),XContentType.JSON)); .source(ace.toJson(),XContentType.JSON));
log.debug("Indexing to ES: "+ace.toJson());
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT); BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
log.info("Indexing to ES completed with status: "+bulkResponse.status()); log.info("Indexing to ES completed with status: "+bulkResponse.status());
if (bulkResponse.hasFailures()) { if (bulkResponse.hasFailures()) {

View File

@ -81,17 +81,14 @@ public class GraphDBClient {
log.warn("Got record with no objIdentifier -- skipping"); log.warn("Got record with no objIdentifier -- skipping");
return 0; return 0;
} }
log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl);
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
manager.init(); manager.init();
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
log.debug("manager init");
Repository repository = manager.getRepository(getRepository()); Repository repository = manager.getRepository(getRepository());
ValueFactory factory = repository.getValueFactory(); ValueFactory factory = repository.getValueFactory();
String datasourceApi = recordParserHelper.getDatasourceApi(record); String datasourceApi = recordParserHelper.getDatasourceApi(record);
IRI graph = factory.createIRI(getGraphDBBaseURI(), datasourceApi); IRI graph = factory.createIRI(getGraphDBBaseURI(), datasourceApi);
try (RepositoryConnection con = repository.getConnection()) { try (RepositoryConnection con = repository.getConnection()) {
log.debug("connection established");
con.begin(); con.begin();
String recordURI = getRecordURI(objIdentifier, datasourceApi); String recordURI = getRecordURI(objIdentifier, datasourceApi);
log.debug("Trying to adding record with recordURI " + recordURI + " into graph " + graph); log.debug("Trying to adding record with recordURI " + recordURI + " into graph " + graph);
@ -105,7 +102,6 @@ public class GraphDBClient {
} }
repository.shutDown(); repository.shutDown();
manager.shutDown(); manager.shutDown();
log.debug("manager shutDown");
return 1; return 1;
}catch(Throwable e){ }catch(Throwable e){
log.error(e); log.error(e);
@ -116,11 +112,9 @@ public class GraphDBClient {
public long feedProvenance(final String datasource, final String datasourceApi) throws AriadnePlusPublisherException { public long feedProvenance(final String datasource, final String datasourceApi) throws AriadnePlusPublisherException {
try { try {
log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl);
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
manager.init(); manager.init();
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
log.debug("manager init");
Repository repository = manager.getRepository(getRepository()); Repository repository = manager.getRepository(getRepository());
ValueFactory factory = repository.getValueFactory(); ValueFactory factory = repository.getValueFactory();
IRI IS_API_OF = factory.createIRI(PROVENANCE_NS, "isApiOf"); IRI IS_API_OF = factory.createIRI(PROVENANCE_NS, "isApiOf");
@ -158,7 +152,6 @@ public class GraphDBClient {
public long dropDatasourceApiGraph(final String datasourceApi) throws AriadnePlusPublisherException { public long dropDatasourceApiGraph(final String datasourceApi) throws AriadnePlusPublisherException {
try { try {
log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl);
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
manager.init(); manager.init();
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
@ -268,15 +261,12 @@ public class GraphDBClient {
public String updateSparql(final String queryValue) throws AriadnePlusPublisherException{ public String updateSparql(final String queryValue) throws AriadnePlusPublisherException{
try { try {
String result = new String(""); String result = new String("");
log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl);
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
manager.init(); manager.init();
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
log.debug("manager init");
Repository repository = manager.getRepository(getRepository()); Repository repository = manager.getRepository(getRepository());
ValueFactory factory = repository.getValueFactory(); ValueFactory factory = repository.getValueFactory();
try (RepositoryConnection con = repository.getConnection()) { try (RepositoryConnection con = repository.getConnection()) {
log.debug("connection established");
con.begin(); con.begin();
Update updateResultQuery = con.prepareUpdate(queryValue); Update updateResultQuery = con.prepareUpdate(queryValue);
@ -298,7 +288,6 @@ public class GraphDBClient {
} }
repository.shutDown(); repository.shutDown();
manager.shutDown(); manager.shutDown();
log.debug("manager shutDown");
return result; return result;
}catch(Throwable e){ }catch(Throwable e){
log.error(e); log.error(e);
@ -309,15 +298,12 @@ public class GraphDBClient {
public String feedFromURL(final String dataUrl, final String context) throws AriadnePlusPublisherException{ public String feedFromURL(final String dataUrl, final String context) throws AriadnePlusPublisherException{
try { try {
String result = new String(""); String result = new String("");
log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl);
RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl);
manager.init(); manager.init();
manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd());
log.debug("manager init");
Repository repository = manager.getRepository(getRepository()); Repository repository = manager.getRepository(getRepository());
ValueFactory factory = repository.getValueFactory(); ValueFactory factory = repository.getValueFactory();
try (RepositoryConnection con = repository.getConnection()) { try (RepositoryConnection con = repository.getConnection()) {
log.debug("connection established");
con.begin(); con.begin();
String baseUri = null; String baseUri = null;
IRI contextIRI = factory.createIRI(getGraphDBBaseURI(), context); IRI contextIRI = factory.createIRI(getGraphDBBaseURI(), context);
@ -332,7 +318,6 @@ public class GraphDBClient {
} }
repository.shutDown(); repository.shutDown();
manager.shutDown(); manager.shutDown();
log.debug("manager shutDown");
return result; return result;
}catch(Throwable e){ }catch(Throwable e){
log.error(e); log.error(e);

View File

@ -100,7 +100,6 @@ public class AriadnePlusPublisherHelper {
} }
private void publishGraphDB(final String record) throws AriadnePlusPublisherException { private void publishGraphDB(final String record) throws AriadnePlusPublisherException {
log.debug("Publishing on graphdb");
GraphDBClient graphDBClient = this.graphdbClientFactory.getGraphDBClient(); GraphDBClient graphDBClient = this.graphdbClientFactory.getGraphDBClient();
graphDBClient.feed(record); graphDBClient.feed(record);
} }

View File

@ -72,18 +72,17 @@ public class RunSPARQLQueryService {
private String executeQueryGraph(String selectQueryTemplate, String recordId, boolean isCollection){ private String executeQueryGraph(String selectQueryTemplate, String recordId, boolean isCollection){
log.debug("Retrieving "+recordId+" - isCollection:"+isCollection ); log.debug("Retrieving "+recordId+" - isCollection:"+isCollection );
String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">"); String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">");
log.debug(query);
openConnection(); openConnection();
StringWriter recordWriter = null; StringWriter recordWriter = null;
Model resultsModel = null; Model resultsModel = null;
String jsonRecord = null; String jsonRecord = null;
try { try {
log.debug("Started at: "+Calendar.getInstance().getTime().toString()); // log.debug("Started at: "+Calendar.getInstance().getTime().toString());
GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query); GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query);
GraphQueryResult graphQueryResult = graphQuery.evaluate(); GraphQueryResult graphQueryResult = graphQuery.evaluate();
resultsModel = QueryResults.asModel(graphQueryResult); resultsModel = QueryResults.asModel(graphQueryResult);
graphQueryResult.close(); graphQueryResult.close();
log.debug("Finished at: "+Calendar.getInstance().getTime().toString()); // log.debug("Finished at: "+Calendar.getInstance().getTime().toString());
log.debug("Statements retrieved: " + resultsModel.size()); log.debug("Statements retrieved: " + resultsModel.size());
if (resultsModel.size()==0) { if (resultsModel.size()==0) {
return "noresult"; return "noresult";
@ -94,7 +93,6 @@ public class RunSPARQLQueryService {
if (isCollection) { if (isCollection) {
parser.setCollection(true); parser.setCollection(true);
} }
log.debug("rdf > json : "+recordWriter.toString());
parser.parse(recordWriter.toString()); parser.parse(recordWriter.toString());
resourceManager.manage(parser); resourceManager.manage(parser);
bulkUpload.index(resourceManager, isCollection); bulkUpload.index(resourceManager, isCollection);

View File

@ -55,10 +55,8 @@ public class ParseRDFJSON {
setJson(json); setJson(json);
fillMap(); fillMap();
DocumentContext jsonContext = JsonPath.parse(json); DocumentContext jsonContext = JsonPath.parse(json);
log.debug(getCatalogEntryJsonPath());
JSONArray entries = jsonContext.read(getCatalogEntryJsonPath()); JSONArray entries = jsonContext.read(getCatalogEntryJsonPath());
int size = entries.size(); int size = entries.size();
log.debug("num elements in json: "+size);
it = entries.iterator(); it = entries.iterator();
} }
@ -85,6 +83,10 @@ public class ParseRDFJSON {
this.catalogEntryJsonPath = catalogEntryJsonPath; this.catalogEntryJsonPath = catalogEntryJsonPath;
} }
public void setCatalogEntryCollectionJsonPath(String catalogEntryCollectionJsonPath) {
this.catalogEntryCollectionJsonPath = catalogEntryCollectionJsonPath;
}
public boolean isCollection() { public boolean isCollection() {
return isCollection; return isCollection;
} }

View File

@ -40,6 +40,7 @@ public class GraphDbReaderAndESIndexTest {
appProps.getProperty("graphdb.repository")); appProps.getProperty("graphdb.repository"));
ParseRDFJSON parseRDFJSON = new ParseRDFJSON(); ParseRDFJSON parseRDFJSON = new ParseRDFJSON();
parseRDFJSON.setCatalogEntryJsonPath(appProps.getProperty("catalog.entry.path")); parseRDFJSON.setCatalogEntryJsonPath(appProps.getProperty("catalog.entry.path"));
parseRDFJSON.setCatalogEntryCollectionJsonPath(appProps.getProperty("catalog.entry.collection.path"));
runSPQRLQuery.setParser(parseRDFJSON); runSPQRLQuery.setParser(parseRDFJSON);
ResourceManager resourceManager = new ResourceManager(); ResourceManager resourceManager = new ResourceManager();
resourceManager.setup( resourceManager.setup(
@ -52,13 +53,13 @@ public class GraphDbReaderAndESIndexTest {
BulkUpload bulkUpload = new BulkUpload(); BulkUpload bulkUpload = new BulkUpload();
bulkUpload.init(appProps.getProperty("elasticsearch.hostname"),appProps.getProperty("elasticsearch.indexname")); bulkUpload.init(appProps.getProperty("elasticsearch.hostname"),appProps.getProperty("elasticsearch.indexname"));
runSPQRLQuery.setBulkUpload(bulkUpload); runSPQRLQuery.setBulkUpload(bulkUpload);
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/0320AA03-BED9-3F2B-AF5D-E31EE50CAE97"; String recordId = "https://ariadne-infrastructure.eu/aocat/Collection/ADS/849B1C0F-4C5F-3D8C-9082-CA60DBB4F557";
String datasource = "ads"; String datasource = "ads";
String collectionId = "271"; String collectionId = "271";
List<String> recordIds = Arrays.asList(recordId); List<String> recordIds = Arrays.asList(recordId);
final ClassPathResource queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql"); final ClassPathResource queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_collection_data_template.sparql");
String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name()); String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name());
runSPQRLQuery.executeMultipleQueryGraph(queryTemplate, recordIds, datasource, collectionId, false); runSPQRLQuery.executeMultipleQueryGraph(queryTemplate, recordIds, datasource, collectionId, true);
} }