added BST date format handling parsing, before creating es record

This commit is contained in:
Enrico Ottonello 2020-11-30 13:27:28 +01:00
parent fbc4fc8717
commit 8baf2d7941
5 changed files with 50 additions and 23 deletions

View File

@ -57,7 +57,6 @@ public class BulkUpload {
try {
Object next = manager.next();
AriadneCatalogEntry ace = ((AriadneCatalogEntry) next);
// log.warn("ready to index: "+ace.toJson());
if (isCollection) {
ace.setResourceType("collection");
if (ace.getSpatial()==null) {
@ -125,9 +124,9 @@ public class BulkUpload {
String idES = splits[splits.length-1];
request.add(new IndexRequest(elasticSearchIndexName).id(idES)
.source(ace.toJson(),XContentType.JSON));
// log.info("Indexing: "+idES+" :: "+ace.toJson());
long start = System.currentTimeMillis();
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
long end = System.currentTimeMillis();
if (bulkResponse!=null) {
esResponseCode = bulkResponse.status().getStatus();
// log.info("Indexing to ES completed with status: " + bulkResponse.status());
@ -138,9 +137,10 @@ public class BulkUpload {
else {
esResponseCode = -3;
}
} catch (Exception e) {
e.printStackTrace();
log.error("Indexing "+e.getMessage());
log.debug(idES+" es_index_time(sec): "+(end-start)/1000+" response_code: "+esResponseCode);
} catch (Throwable t) {
t.printStackTrace();
log.error("Indexing "+t.getMessage());
return -1;
}
}

View File

@ -66,7 +66,6 @@ public class RunSPARQLQueryService {
if (queryTemplate==null)
return null;
final String selectQueryTemplate = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
log.info("Start indexing "+ recordIds.size()+ " records ...");
final List<Integer> errorCodesCount = Arrays.asList(new Integer(0));
final List<Integer> successCodesCount = Arrays.asList(new Integer(0));
final List<Integer> counter = Arrays.asList(new Integer(0));
@ -132,19 +131,18 @@ public class RunSPARQLQueryService {
} catch (InterruptedException e) {
e.printStackTrace();
}
log.debug("Retrieving "+recordId+" - isCollection:"+isCollection );
String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">");
openConnection();
StringWriter recordWriter = null;
Model resultsModel = null;
try {
log.debug("Started at: "+Calendar.getInstance().getTime().toString());
long start = System.currentTimeMillis();
GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query);
GraphQueryResult graphQueryResult = graphQuery.evaluate();
resultsModel = QueryResults.asModel(graphQueryResult);
graphQueryResult.close();
log.debug("Finished at: "+Calendar.getInstance().getTime().toString());
log.debug("Statements retrieved: " + resultsModel.size());
long end = System.currentTimeMillis();
int triples = resultsModel.size();
if (resultsModel.size()==0) {
return -2;
}
@ -155,8 +153,8 @@ public class RunSPARQLQueryService {
String bufferedRecord = recordWriter.toString();
// log.debug(bufferedRecord);
int size = parser.parse(bufferedRecord);
log.debug("json elements: "+size);
if (size==-1) {
log.debug(recordId+" is_coll: "+isCollection+" query_time(sec): "+(end-start)/1000 +" triples: "+triples +" json: "+size);
if (size==-1) {
return -4;
}
resourceManager.manage(parser);

View File

@ -1,13 +1,24 @@
package eu.dnetlib.ariadneplus.reader.utils;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.TextStyle;
import java.time.temporal.ChronoField;
import java.util.Collections;
import java.util.Locale;
public class ESUtils {
private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy");
private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static ZoneId preferredZone = ZoneId.of("Europe/London");
private static DateTimeFormatter BST_FORMATTER = new DateTimeFormatterBuilder()
.appendPattern("EEE MMM dd HH:mm:ss ")
.appendZoneText(TextStyle.SHORT, Collections.singleton(preferredZone))
.appendPattern(" yyyy")
.toFormatter(Locale.ROOT);
private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder()
.appendPattern("yyyy")
.parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
@ -27,10 +38,19 @@ public class ESUtils {
LocalDate parsedDate = LocalDate.parse(originalDate.substring(0, 10), elasticSearchDateFormatter);
return parsedDate.format(elasticSearchDateFormatter);
} catch (Exception e2) {
return "0000";
try {
return parseBST(originalDate);
} catch (Exception e3) {
return "0000";
}
}
}
}
}
private static String parseBST(String BSTDate) {
ZonedDateTime zd = ZonedDateTime.parse(BSTDate, BST_FORMATTER);
return zd.format(elasticSearchDateFormatter);
}
}

View File

@ -27,8 +27,15 @@ public class GraphDbReaderAndESIndexTest {
private RunSPARQLQueryService runSPQRLQuery;
@Test
// @Ignore
public void readAndIndexTest() throws Exception {
public void ads1093RecordTest() throws Exception {
boolean testRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/1B23A61E-E4DE-3647-8006-0284C729AF85";
String datasource = "ads";
String collectionId = "1093";
readAndIndexTest(testRecord, recordId, datasource, collectionId);
}
private void readAndIndexTest(boolean testRecord, String recordId, String datasource, String collectionId) throws Exception {
final ClassPathResource resource = new ClassPathResource("application.properties");
Properties appProps = new Properties();
appProps.load(resource.getInputStream());
@ -54,20 +61,13 @@ public class GraphDbReaderAndESIndexTest {
BulkUpload bulkUpload = new BulkUpload();
bulkUpload.init(appProps.getProperty("elasticsearch.hostname"),appProps.getProperty("elasticsearch.indexname"));
runSPQRLQuery.setBulkUpload(bulkUpload);
String recordId;
final ClassPathResource queryTemplateResource;
boolean testRecord = true;
if (testRecord) {
recordId = "https://ariadne-infrastructure.eu/aocat/Resource/3037F979-F94F-380B-A6A6-3972ED10E61C";
queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql");
}
else {
recordId = "https://ariadne-infrastructure.eu/aocat/Collection/CENIEH/A4A042CF-4FD6-3FB4-B701-CBC8A1653ADA";
queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_collection_data_template.sparql");
}
String datasource = "cenieh";
String collectionId = "171";
List<String> recordIds = Arrays.asList(recordId);
String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name());
if (testRecord) {

View File

@ -5,10 +5,19 @@ import org.junit.Test;
public class ESUtilsTest {
// Elastic search format: yyyy-MM-dd or yyyy
@Test
public void testParseDate(){
String date = "2013-03-13";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(date, parsed);
}
@Test
public void testBSTDate(){
String date = "Fri May 15 13:21:02 BST 2020";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(parsed, "2020-05-15");
}
}