added BST date format handling parsing, before creating es record

This commit is contained in:
Enrico Ottonello 2020-11-30 13:27:28 +01:00
parent fbc4fc8717
commit 8baf2d7941
5 changed files with 50 additions and 23 deletions

View File

@ -57,7 +57,6 @@ public class BulkUpload {
try { try {
Object next = manager.next(); Object next = manager.next();
AriadneCatalogEntry ace = ((AriadneCatalogEntry) next); AriadneCatalogEntry ace = ((AriadneCatalogEntry) next);
// log.warn("ready to index: "+ace.toJson());
if (isCollection) { if (isCollection) {
ace.setResourceType("collection"); ace.setResourceType("collection");
if (ace.getSpatial()==null) { if (ace.getSpatial()==null) {
@ -125,9 +124,9 @@ public class BulkUpload {
String idES = splits[splits.length-1]; String idES = splits[splits.length-1];
request.add(new IndexRequest(elasticSearchIndexName).id(idES) request.add(new IndexRequest(elasticSearchIndexName).id(idES)
.source(ace.toJson(),XContentType.JSON)); .source(ace.toJson(),XContentType.JSON));
long start = System.currentTimeMillis();
// log.info("Indexing: "+idES+" :: "+ace.toJson());
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT); BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
long end = System.currentTimeMillis();
if (bulkResponse!=null) { if (bulkResponse!=null) {
esResponseCode = bulkResponse.status().getStatus(); esResponseCode = bulkResponse.status().getStatus();
// log.info("Indexing to ES completed with status: " + bulkResponse.status()); // log.info("Indexing to ES completed with status: " + bulkResponse.status());
@ -138,9 +137,10 @@ public class BulkUpload {
else { else {
esResponseCode = -3; esResponseCode = -3;
} }
} catch (Exception e) { log.debug(idES+" es_index_time(sec): "+(end-start)/1000+" response_code: "+esResponseCode);
e.printStackTrace(); } catch (Throwable t) {
log.error("Indexing "+e.getMessage()); t.printStackTrace();
log.error("Indexing "+t.getMessage());
return -1; return -1;
} }
} }

View File

@ -66,7 +66,6 @@ public class RunSPARQLQueryService {
if (queryTemplate==null) if (queryTemplate==null)
return null; return null;
final String selectQueryTemplate = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId); final String selectQueryTemplate = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
log.info("Start indexing "+ recordIds.size()+ " records ...");
final List<Integer> errorCodesCount = Arrays.asList(new Integer(0)); final List<Integer> errorCodesCount = Arrays.asList(new Integer(0));
final List<Integer> successCodesCount = Arrays.asList(new Integer(0)); final List<Integer> successCodesCount = Arrays.asList(new Integer(0));
final List<Integer> counter = Arrays.asList(new Integer(0)); final List<Integer> counter = Arrays.asList(new Integer(0));
@ -132,19 +131,18 @@ public class RunSPARQLQueryService {
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
} }
log.debug("Retrieving "+recordId+" - isCollection:"+isCollection );
String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">"); String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">");
openConnection(); openConnection();
StringWriter recordWriter = null; StringWriter recordWriter = null;
Model resultsModel = null; Model resultsModel = null;
try { try {
log.debug("Started at: "+Calendar.getInstance().getTime().toString()); long start = System.currentTimeMillis();
GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query); GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query);
GraphQueryResult graphQueryResult = graphQuery.evaluate(); GraphQueryResult graphQueryResult = graphQuery.evaluate();
resultsModel = QueryResults.asModel(graphQueryResult); resultsModel = QueryResults.asModel(graphQueryResult);
graphQueryResult.close(); graphQueryResult.close();
log.debug("Finished at: "+Calendar.getInstance().getTime().toString()); long end = System.currentTimeMillis();
log.debug("Statements retrieved: " + resultsModel.size()); int triples = resultsModel.size();
if (resultsModel.size()==0) { if (resultsModel.size()==0) {
return -2; return -2;
} }
@ -155,8 +153,8 @@ public class RunSPARQLQueryService {
String bufferedRecord = recordWriter.toString(); String bufferedRecord = recordWriter.toString();
// log.debug(bufferedRecord); // log.debug(bufferedRecord);
int size = parser.parse(bufferedRecord); int size = parser.parse(bufferedRecord);
log.debug("json elements: "+size); log.debug(recordId+" is_coll: "+isCollection+" query_time(sec): "+(end-start)/1000 +" triples: "+triples +" json: "+size);
if (size==-1) { if (size==-1) {
return -4; return -4;
} }
resourceManager.manage(parser); resourceManager.manage(parser);

View File

@ -1,13 +1,24 @@
package eu.dnetlib.ariadneplus.reader.utils; package eu.dnetlib.ariadneplus.reader.utils;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder; import java.time.format.DateTimeFormatterBuilder;
import java.time.format.TextStyle;
import java.time.temporal.ChronoField; import java.time.temporal.ChronoField;
import java.util.Collections;
import java.util.Locale;
public class ESUtils { public class ESUtils {
private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy"); private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy");
private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static ZoneId preferredZone = ZoneId.of("Europe/London");
private static DateTimeFormatter BST_FORMATTER = new DateTimeFormatterBuilder()
.appendPattern("EEE MMM dd HH:mm:ss ")
.appendZoneText(TextStyle.SHORT, Collections.singleton(preferredZone))
.appendPattern(" yyyy")
.toFormatter(Locale.ROOT);
private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder() private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder()
.appendPattern("yyyy") .appendPattern("yyyy")
.parseDefaulting(ChronoField.MONTH_OF_YEAR, 1) .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
@ -27,10 +38,19 @@ public class ESUtils {
LocalDate parsedDate = LocalDate.parse(originalDate.substring(0, 10), elasticSearchDateFormatter); LocalDate parsedDate = LocalDate.parse(originalDate.substring(0, 10), elasticSearchDateFormatter);
return parsedDate.format(elasticSearchDateFormatter); return parsedDate.format(elasticSearchDateFormatter);
} catch (Exception e2) { } catch (Exception e2) {
return "0000"; try {
return parseBST(originalDate);
} catch (Exception e3) {
return "0000";
}
} }
} }
} }
} }
private static String parseBST(String BSTDate) {
ZonedDateTime zd = ZonedDateTime.parse(BSTDate, BST_FORMATTER);
return zd.format(elasticSearchDateFormatter);
}
} }

View File

@ -27,8 +27,15 @@ public class GraphDbReaderAndESIndexTest {
private RunSPARQLQueryService runSPQRLQuery; private RunSPARQLQueryService runSPQRLQuery;
@Test @Test
// @Ignore public void ads1093RecordTest() throws Exception {
public void readAndIndexTest() throws Exception { boolean testRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/1B23A61E-E4DE-3647-8006-0284C729AF85";
String datasource = "ads";
String collectionId = "1093";
readAndIndexTest(testRecord, recordId, datasource, collectionId);
}
private void readAndIndexTest(boolean testRecord, String recordId, String datasource, String collectionId) throws Exception {
final ClassPathResource resource = new ClassPathResource("application.properties"); final ClassPathResource resource = new ClassPathResource("application.properties");
Properties appProps = new Properties(); Properties appProps = new Properties();
appProps.load(resource.getInputStream()); appProps.load(resource.getInputStream());
@ -54,20 +61,13 @@ public class GraphDbReaderAndESIndexTest {
BulkUpload bulkUpload = new BulkUpload(); BulkUpload bulkUpload = new BulkUpload();
bulkUpload.init(appProps.getProperty("elasticsearch.hostname"),appProps.getProperty("elasticsearch.indexname")); bulkUpload.init(appProps.getProperty("elasticsearch.hostname"),appProps.getProperty("elasticsearch.indexname"));
runSPQRLQuery.setBulkUpload(bulkUpload); runSPQRLQuery.setBulkUpload(bulkUpload);
String recordId;
final ClassPathResource queryTemplateResource; final ClassPathResource queryTemplateResource;
boolean testRecord = true;
if (testRecord) { if (testRecord) {
recordId = "https://ariadne-infrastructure.eu/aocat/Resource/3037F979-F94F-380B-A6A6-3972ED10E61C";
queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql"); queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql");
} }
else { else {
recordId = "https://ariadne-infrastructure.eu/aocat/Collection/CENIEH/A4A042CF-4FD6-3FB4-B701-CBC8A1653ADA";
queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_collection_data_template.sparql"); queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_collection_data_template.sparql");
} }
String datasource = "cenieh";
String collectionId = "171";
List<String> recordIds = Arrays.asList(recordId); List<String> recordIds = Arrays.asList(recordId);
String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name()); String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name());
if (testRecord) { if (testRecord) {

View File

@ -5,10 +5,19 @@ import org.junit.Test;
public class ESUtilsTest { public class ESUtilsTest {
// Elastic search format: yyyy-MM-dd or yyyy
@Test @Test
public void testParseDate(){ public void testParseDate(){
String date = "2013-03-13"; String date = "2013-03-13";
String parsed = ESUtils.getESFormatDate(date); String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(date, parsed); Assert.assertEquals(date, parsed);
} }
@Test
public void testBSTDate(){
String date = "Fri May 15 13:21:02 BST 2020";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(parsed, "2020-05-15");
}
} }