From 9358f1eaf2d46f040582b95d90f32b3cbdeb1b99 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Wed, 8 Jul 2020 22:00:28 +0200 Subject: [PATCH] collection data are now indexed with all required fields by es mapping --- .../ariadneplus/elasticsearch/BulkUpload.java | 21 +++++++++---------- .../ariadneplus/graphdb/GraphDBClient.java | 15 ------------- .../publisher/AriadnePlusPublisherHelper.java | 1 - .../reader/RunSPARQLQueryService.java | 8 +++---- .../ariadneplus/reader/json/ParseRDFJSON.java | 6 ++++-- .../GraphDbReaderAndESIndexTest.java | 7 ++++--- 6 files changed, 21 insertions(+), 37 deletions(-) diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/elasticsearch/BulkUpload.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/elasticsearch/BulkUpload.java index 53af97b..51a0f66 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/elasticsearch/BulkUpload.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/elasticsearch/BulkUpload.java @@ -61,16 +61,17 @@ public class BulkUpload { } else { ace.setResourceType("dataset"); + Spatial esSpatial = new Spatial(); + ace.getSpatial().stream().filter(s -> s.getPlaceName()!=null).forEach(s -> { + esSpatial.setPlaceName(s.getPlaceName()); + }); + ace.getSpatial().stream().filter(s -> s.getLocation()!=null).forEach(s -> { + esSpatial.setLocation(s.getLocation()); + }); + ace.getSpatial().clear(); + ace.setSpatial(Arrays.asList(esSpatial)); } - Spatial esSpatial = new Spatial(); - ace.getSpatial().stream().filter(s -> s.getPlaceName()!=null).forEach(s -> { - esSpatial.setPlaceName(s.getPlaceName()); - }); - ace.getSpatial().stream().filter(s -> s.getLocation()!=null).forEach(s -> { - esSpatial.setLocation(s.getLocation()); - }); - ace.getSpatial().clear(); - ace.setSpatial(Arrays.asList(esSpatial)); + String uniqueIsPartOf = ace.getUniqueIsPartOf(); if (uniqueIsPartOf!=null) { ace.setIsPartOf(Arrays.asList(uniqueIsPartOf)); @@ -102,10 +103,8 @@ public class BulkUpload { testPublisher.setName("TEST"); ace.getPublisher().add(testPublisher); String[] splits = ace.getIdentifier().split("/"); - log.debug("indexing: "+ace.toJson()); request.add(new IndexRequest(elasticSearchIndexName).id(splits[splits.length-1]) .source(ace.toJson(),XContentType.JSON)); - log.debug("Indexing to ES: "+ace.toJson()); BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT); log.info("Indexing to ES completed with status: "+bulkResponse.status()); if (bulkResponse.hasFailures()) { diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/graphdb/GraphDBClient.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/graphdb/GraphDBClient.java index d685986..3916d12 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/graphdb/GraphDBClient.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/graphdb/GraphDBClient.java @@ -81,17 +81,14 @@ public class GraphDBClient { log.warn("Got record with no objIdentifier -- skipping"); return 0; } - log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); manager.init(); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); - log.debug("manager init"); Repository repository = manager.getRepository(getRepository()); ValueFactory factory = repository.getValueFactory(); String datasourceApi = recordParserHelper.getDatasourceApi(record); IRI graph = factory.createIRI(getGraphDBBaseURI(), datasourceApi); try (RepositoryConnection con = repository.getConnection()) { - log.debug("connection established"); con.begin(); String recordURI = getRecordURI(objIdentifier, datasourceApi); log.debug("Trying to adding record with recordURI " + recordURI + " into graph " + graph); @@ -105,7 +102,6 @@ public class GraphDBClient { } repository.shutDown(); manager.shutDown(); - log.debug("manager shutDown"); return 1; }catch(Throwable e){ log.error(e); @@ -116,11 +112,9 @@ public class GraphDBClient { public long feedProvenance(final String datasource, final String datasourceApi) throws AriadnePlusPublisherException { try { - log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); manager.init(); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); - log.debug("manager init"); Repository repository = manager.getRepository(getRepository()); ValueFactory factory = repository.getValueFactory(); IRI IS_API_OF = factory.createIRI(PROVENANCE_NS, "isApiOf"); @@ -158,7 +152,6 @@ public class GraphDBClient { public long dropDatasourceApiGraph(final String datasourceApi) throws AriadnePlusPublisherException { try { - log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); manager.init(); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); @@ -268,15 +261,12 @@ public class GraphDBClient { public String updateSparql(final String queryValue) throws AriadnePlusPublisherException{ try { String result = new String(""); - log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); manager.init(); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); - log.debug("manager init"); Repository repository = manager.getRepository(getRepository()); ValueFactory factory = repository.getValueFactory(); try (RepositoryConnection con = repository.getConnection()) { - log.debug("connection established"); con.begin(); Update updateResultQuery = con.prepareUpdate(queryValue); @@ -298,7 +288,6 @@ public class GraphDBClient { } repository.shutDown(); manager.shutDown(); - log.debug("manager shutDown"); return result; }catch(Throwable e){ log.error(e); @@ -309,15 +298,12 @@ public class GraphDBClient { public String feedFromURL(final String dataUrl, final String context) throws AriadnePlusPublisherException{ try { String result = new String(""); - log.debug("init connection to graphDBServerUrl " + this.graphDBServerUrl); RemoteRepositoryManager manager = new RemoteRepositoryManager(this.graphDBServerUrl); manager.init(); manager.setUsernameAndPassword(getWriterUser(), getWriterPwd()); - log.debug("manager init"); Repository repository = manager.getRepository(getRepository()); ValueFactory factory = repository.getValueFactory(); try (RepositoryConnection con = repository.getConnection()) { - log.debug("connection established"); con.begin(); String baseUri = null; IRI contextIRI = factory.createIRI(getGraphDBBaseURI(), context); @@ -332,7 +318,6 @@ public class GraphDBClient { } repository.shutDown(); manager.shutDown(); - log.debug("manager shutDown"); return result; }catch(Throwable e){ log.error(e); diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/publisher/AriadnePlusPublisherHelper.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/publisher/AriadnePlusPublisherHelper.java index eed6ac0..c7ccbb9 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/publisher/AriadnePlusPublisherHelper.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/publisher/AriadnePlusPublisherHelper.java @@ -100,7 +100,6 @@ public class AriadnePlusPublisherHelper { } private void publishGraphDB(final String record) throws AriadnePlusPublisherException { - log.debug("Publishing on graphdb"); GraphDBClient graphDBClient = this.graphdbClientFactory.getGraphDBClient(); graphDBClient.feed(record); } diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java index b9f095d..075db7d 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java @@ -72,18 +72,17 @@ public class RunSPARQLQueryService { private String executeQueryGraph(String selectQueryTemplate, String recordId, boolean isCollection){ log.debug("Retrieving "+recordId+" - isCollection:"+isCollection ); String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">"); - log.debug(query); openConnection(); StringWriter recordWriter = null; Model resultsModel = null; String jsonRecord = null; try { - log.debug("Started at: "+Calendar.getInstance().getTime().toString()); +// log.debug("Started at: "+Calendar.getInstance().getTime().toString()); GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query); GraphQueryResult graphQueryResult = graphQuery.evaluate(); resultsModel = QueryResults.asModel(graphQueryResult); graphQueryResult.close(); - log.debug("Finished at: "+Calendar.getInstance().getTime().toString()); +// log.debug("Finished at: "+Calendar.getInstance().getTime().toString()); log.debug("Statements retrieved: " + resultsModel.size()); if (resultsModel.size()==0) { return "noresult"; @@ -94,8 +93,7 @@ public class RunSPARQLQueryService { if (isCollection) { parser.setCollection(true); } - log.debug("rdf > json : "+recordWriter.toString()); - parser.parse(recordWriter.toString()); + parser.parse(recordWriter.toString()); resourceManager.manage(parser); bulkUpload.index(resourceManager, isCollection); } catch(Exception e){ diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/json/ParseRDFJSON.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/json/ParseRDFJSON.java index 4a2549f..6ccd2d9 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/json/ParseRDFJSON.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/json/ParseRDFJSON.java @@ -55,10 +55,8 @@ public class ParseRDFJSON { setJson(json); fillMap(); DocumentContext jsonContext = JsonPath.parse(json); - log.debug(getCatalogEntryJsonPath()); JSONArray entries = jsonContext.read(getCatalogEntryJsonPath()); int size = entries.size(); - log.debug("num elements in json: "+size); it = entries.iterator(); } @@ -85,6 +83,10 @@ public class ParseRDFJSON { this.catalogEntryJsonPath = catalogEntryJsonPath; } + public void setCatalogEntryCollectionJsonPath(String catalogEntryCollectionJsonPath) { + this.catalogEntryCollectionJsonPath = catalogEntryCollectionJsonPath; + } + public boolean isCollection() { return isCollection; } diff --git a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java index f7d5d2a..d70d3ca 100644 --- a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java +++ b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java @@ -40,6 +40,7 @@ public class GraphDbReaderAndESIndexTest { appProps.getProperty("graphdb.repository")); ParseRDFJSON parseRDFJSON = new ParseRDFJSON(); parseRDFJSON.setCatalogEntryJsonPath(appProps.getProperty("catalog.entry.path")); + parseRDFJSON.setCatalogEntryCollectionJsonPath(appProps.getProperty("catalog.entry.collection.path")); runSPQRLQuery.setParser(parseRDFJSON); ResourceManager resourceManager = new ResourceManager(); resourceManager.setup( @@ -52,13 +53,13 @@ public class GraphDbReaderAndESIndexTest { BulkUpload bulkUpload = new BulkUpload(); bulkUpload.init(appProps.getProperty("elasticsearch.hostname"),appProps.getProperty("elasticsearch.indexname")); runSPQRLQuery.setBulkUpload(bulkUpload); - String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/0320AA03-BED9-3F2B-AF5D-E31EE50CAE97"; + String recordId = "https://ariadne-infrastructure.eu/aocat/Collection/ADS/849B1C0F-4C5F-3D8C-9082-CA60DBB4F557"; String datasource = "ads"; String collectionId = "271"; List recordIds = Arrays.asList(recordId); - final ClassPathResource queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql"); + final ClassPathResource queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_collection_data_template.sparql"); String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name()); - runSPQRLQuery.executeMultipleQueryGraph(queryTemplate, recordIds, datasource, collectionId, false); + runSPQRLQuery.executeMultipleQueryGraph(queryTemplate, recordIds, datasource, collectionId, true); }