diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/elasticsearch/BulkUpload.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/elasticsearch/BulkUpload.java index e5a1be1..f3c8a8e 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/elasticsearch/BulkUpload.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/elasticsearch/BulkUpload.java @@ -57,7 +57,6 @@ public class BulkUpload { try { Object next = manager.next(); AriadneCatalogEntry ace = ((AriadneCatalogEntry) next); -// log.warn("ready to index: "+ace.toJson()); if (isCollection) { ace.setResourceType("collection"); if (ace.getSpatial()==null) { @@ -125,9 +124,9 @@ public class BulkUpload { String idES = splits[splits.length-1]; request.add(new IndexRequest(elasticSearchIndexName).id(idES) .source(ace.toJson(),XContentType.JSON)); - -// log.info("Indexing: "+idES+" :: "+ace.toJson()); + long start = System.currentTimeMillis(); BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT); + long end = System.currentTimeMillis(); if (bulkResponse!=null) { esResponseCode = bulkResponse.status().getStatus(); // log.info("Indexing to ES completed with status: " + bulkResponse.status()); @@ -138,9 +137,10 @@ public class BulkUpload { else { esResponseCode = -3; } - } catch (Exception e) { - e.printStackTrace(); - log.error("Indexing "+e.getMessage()); + log.debug(idES+" es_index_time(sec): "+(end-start)/1000+" response_code: "+esResponseCode); + } catch (Throwable t) { + t.printStackTrace(); + log.error("Indexing "+t.getMessage()); return -1; } } diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java index e36b0db..b1b730c 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/RunSPARQLQueryService.java @@ -66,7 +66,6 @@ public class RunSPARQLQueryService { if (queryTemplate==null) return null; final String selectQueryTemplate = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId); - log.info("Start indexing "+ recordIds.size()+ " records ..."); final List errorCodesCount = Arrays.asList(new Integer(0)); final List successCodesCount = Arrays.asList(new Integer(0)); final List counter = Arrays.asList(new Integer(0)); @@ -132,19 +131,18 @@ public class RunSPARQLQueryService { } catch (InterruptedException e) { e.printStackTrace(); } - log.debug("Retrieving "+recordId+" - isCollection:"+isCollection ); String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">"); openConnection(); StringWriter recordWriter = null; Model resultsModel = null; try { - log.debug("Started at: "+Calendar.getInstance().getTime().toString()); + long start = System.currentTimeMillis(); GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query); GraphQueryResult graphQueryResult = graphQuery.evaluate(); resultsModel = QueryResults.asModel(graphQueryResult); graphQueryResult.close(); - log.debug("Finished at: "+Calendar.getInstance().getTime().toString()); - log.debug("Statements retrieved: " + resultsModel.size()); + long end = System.currentTimeMillis(); + int triples = resultsModel.size(); if (resultsModel.size()==0) { return -2; } @@ -155,8 +153,8 @@ public class RunSPARQLQueryService { String bufferedRecord = recordWriter.toString(); // log.debug(bufferedRecord); int size = parser.parse(bufferedRecord); - log.debug("json elements: "+size); - if (size==-1) { + log.debug(recordId+" is_coll: "+isCollection+" query_time(sec): "+(end-start)/1000 +" triples: "+triples +" json: "+size); + if (size==-1) { return -4; } resourceManager.manage(parser); diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/utils/ESUtils.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/utils/ESUtils.java index bd1a140..6acf8d2 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/utils/ESUtils.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/utils/ESUtils.java @@ -1,13 +1,24 @@ package eu.dnetlib.ariadneplus.reader.utils; import java.time.LocalDate; +import java.time.ZoneId; +import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; +import java.time.format.TextStyle; import java.time.temporal.ChronoField; +import java.util.Collections; +import java.util.Locale; public class ESUtils { private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy"); private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + private static ZoneId preferredZone = ZoneId.of("Europe/London"); + private static DateTimeFormatter BST_FORMATTER = new DateTimeFormatterBuilder() + .appendPattern("EEE MMM dd HH:mm:ss ") + .appendZoneText(TextStyle.SHORT, Collections.singleton(preferredZone)) + .appendPattern(" yyyy") + .toFormatter(Locale.ROOT); private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder() .appendPattern("yyyy") .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1) @@ -27,10 +38,19 @@ public class ESUtils { LocalDate parsedDate = LocalDate.parse(originalDate.substring(0, 10), elasticSearchDateFormatter); return parsedDate.format(elasticSearchDateFormatter); } catch (Exception e2) { - return "0000"; + try { + return parseBST(originalDate); + } catch (Exception e3) { + return "0000"; + } } } } } + + private static String parseBST(String BSTDate) { + ZonedDateTime zd = ZonedDateTime.parse(BSTDate, BST_FORMATTER); + return zd.format(elasticSearchDateFormatter); + } } diff --git a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java index 2c24ff5..90a2974 100644 --- a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java +++ b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java @@ -27,8 +27,15 @@ public class GraphDbReaderAndESIndexTest { private RunSPARQLQueryService runSPQRLQuery; @Test -// @Ignore - public void readAndIndexTest() throws Exception { + public void ads1093RecordTest() throws Exception { + boolean testRecord = true; + String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/1B23A61E-E4DE-3647-8006-0284C729AF85"; + String datasource = "ads"; + String collectionId = "1093"; + readAndIndexTest(testRecord, recordId, datasource, collectionId); + } + + private void readAndIndexTest(boolean testRecord, String recordId, String datasource, String collectionId) throws Exception { final ClassPathResource resource = new ClassPathResource("application.properties"); Properties appProps = new Properties(); appProps.load(resource.getInputStream()); @@ -54,20 +61,13 @@ public class GraphDbReaderAndESIndexTest { BulkUpload bulkUpload = new BulkUpload(); bulkUpload.init(appProps.getProperty("elasticsearch.hostname"),appProps.getProperty("elasticsearch.indexname")); runSPQRLQuery.setBulkUpload(bulkUpload); - String recordId; final ClassPathResource queryTemplateResource; - boolean testRecord = true; if (testRecord) { - recordId = "https://ariadne-infrastructure.eu/aocat/Resource/3037F979-F94F-380B-A6A6-3972ED10E61C"; queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql"); } else { - recordId = "https://ariadne-infrastructure.eu/aocat/Collection/CENIEH/A4A042CF-4FD6-3FB4-B701-CBC8A1653ADA"; queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_collection_data_template.sparql"); } - - String datasource = "cenieh"; - String collectionId = "171"; List recordIds = Arrays.asList(recordId); String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name()); if (testRecord) { diff --git a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/reader/utils/ESUtilsTest.java b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/reader/utils/ESUtilsTest.java index 7cd2288..f05e5b5 100644 --- a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/reader/utils/ESUtilsTest.java +++ b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/reader/utils/ESUtilsTest.java @@ -5,10 +5,19 @@ import org.junit.Test; public class ESUtilsTest { +// Elastic search format: yyyy-MM-dd or yyyy + @Test public void testParseDate(){ String date = "2013-03-13"; String parsed = ESUtils.getESFormatDate(date); Assert.assertEquals(date, parsed); } + + @Test + public void testBSTDate(){ + String date = "Fri May 15 13:21:02 BST 2020"; + String parsed = ESUtils.getESFormatDate(date); + Assert.assertEquals(parsed, "2020-05-15"); + } }