added BST date format handling parsing, before creating es record
This commit is contained in:
parent
fbc4fc8717
commit
8baf2d7941
|
@ -57,7 +57,6 @@ public class BulkUpload {
|
|||
try {
|
||||
Object next = manager.next();
|
||||
AriadneCatalogEntry ace = ((AriadneCatalogEntry) next);
|
||||
// log.warn("ready to index: "+ace.toJson());
|
||||
if (isCollection) {
|
||||
ace.setResourceType("collection");
|
||||
if (ace.getSpatial()==null) {
|
||||
|
@ -125,9 +124,9 @@ public class BulkUpload {
|
|||
String idES = splits[splits.length-1];
|
||||
request.add(new IndexRequest(elasticSearchIndexName).id(idES)
|
||||
.source(ace.toJson(),XContentType.JSON));
|
||||
|
||||
// log.info("Indexing: "+idES+" :: "+ace.toJson());
|
||||
long start = System.currentTimeMillis();
|
||||
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
|
||||
long end = System.currentTimeMillis();
|
||||
if (bulkResponse!=null) {
|
||||
esResponseCode = bulkResponse.status().getStatus();
|
||||
// log.info("Indexing to ES completed with status: " + bulkResponse.status());
|
||||
|
@ -138,9 +137,10 @@ public class BulkUpload {
|
|||
else {
|
||||
esResponseCode = -3;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
log.error("Indexing "+e.getMessage());
|
||||
log.debug(idES+" es_index_time(sec): "+(end-start)/1000+" response_code: "+esResponseCode);
|
||||
} catch (Throwable t) {
|
||||
t.printStackTrace();
|
||||
log.error("Indexing "+t.getMessage());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,7 +66,6 @@ public class RunSPARQLQueryService {
|
|||
if (queryTemplate==null)
|
||||
return null;
|
||||
final String selectQueryTemplate = queryTemplate.replaceAll("%datasource", datasource).replaceAll("%collectionId", collectionId);
|
||||
log.info("Start indexing "+ recordIds.size()+ " records ...");
|
||||
final List<Integer> errorCodesCount = Arrays.asList(new Integer(0));
|
||||
final List<Integer> successCodesCount = Arrays.asList(new Integer(0));
|
||||
final List<Integer> counter = Arrays.asList(new Integer(0));
|
||||
|
@ -132,19 +131,18 @@ public class RunSPARQLQueryService {
|
|||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
log.debug("Retrieving "+recordId+" - isCollection:"+isCollection );
|
||||
String query = selectQueryTemplate.replaceAll("%record", "<"+recordId+">");
|
||||
openConnection();
|
||||
StringWriter recordWriter = null;
|
||||
Model resultsModel = null;
|
||||
try {
|
||||
log.debug("Started at: "+Calendar.getInstance().getTime().toString());
|
||||
long start = System.currentTimeMillis();
|
||||
GraphQuery graphQuery = connection.prepareGraphQuery(QueryLanguage.SPARQL, query);
|
||||
GraphQueryResult graphQueryResult = graphQuery.evaluate();
|
||||
resultsModel = QueryResults.asModel(graphQueryResult);
|
||||
graphQueryResult.close();
|
||||
log.debug("Finished at: "+Calendar.getInstance().getTime().toString());
|
||||
log.debug("Statements retrieved: " + resultsModel.size());
|
||||
long end = System.currentTimeMillis();
|
||||
int triples = resultsModel.size();
|
||||
if (resultsModel.size()==0) {
|
||||
return -2;
|
||||
}
|
||||
|
@ -155,8 +153,8 @@ public class RunSPARQLQueryService {
|
|||
String bufferedRecord = recordWriter.toString();
|
||||
// log.debug(bufferedRecord);
|
||||
int size = parser.parse(bufferedRecord);
|
||||
log.debug("json elements: "+size);
|
||||
if (size==-1) {
|
||||
log.debug(recordId+" is_coll: "+isCollection+" query_time(sec): "+(end-start)/1000 +" triples: "+triples +" json: "+size);
|
||||
if (size==-1) {
|
||||
return -4;
|
||||
}
|
||||
resourceManager.manage(parser);
|
||||
|
|
|
@ -1,13 +1,24 @@
|
|||
package eu.dnetlib.ariadneplus.reader.utils;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
import java.time.format.TextStyle;
|
||||
import java.time.temporal.ChronoField;
|
||||
import java.util.Collections;
|
||||
import java.util.Locale;
|
||||
|
||||
public class ESUtils {
|
||||
private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy");
|
||||
private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
|
||||
private static ZoneId preferredZone = ZoneId.of("Europe/London");
|
||||
private static DateTimeFormatter BST_FORMATTER = new DateTimeFormatterBuilder()
|
||||
.appendPattern("EEE MMM dd HH:mm:ss ")
|
||||
.appendZoneText(TextStyle.SHORT, Collections.singleton(preferredZone))
|
||||
.appendPattern(" yyyy")
|
||||
.toFormatter(Locale.ROOT);
|
||||
private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder()
|
||||
.appendPattern("yyyy")
|
||||
.parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
|
||||
|
@ -27,10 +38,19 @@ public class ESUtils {
|
|||
LocalDate parsedDate = LocalDate.parse(originalDate.substring(0, 10), elasticSearchDateFormatter);
|
||||
return parsedDate.format(elasticSearchDateFormatter);
|
||||
} catch (Exception e2) {
|
||||
return "0000";
|
||||
try {
|
||||
return parseBST(originalDate);
|
||||
} catch (Exception e3) {
|
||||
return "0000";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static String parseBST(String BSTDate) {
|
||||
ZonedDateTime zd = ZonedDateTime.parse(BSTDate, BST_FORMATTER);
|
||||
return zd.format(elasticSearchDateFormatter);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,8 +27,15 @@ public class GraphDbReaderAndESIndexTest {
|
|||
private RunSPARQLQueryService runSPQRLQuery;
|
||||
|
||||
@Test
|
||||
// @Ignore
|
||||
public void readAndIndexTest() throws Exception {
|
||||
public void ads1093RecordTest() throws Exception {
|
||||
boolean testRecord = true;
|
||||
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/1B23A61E-E4DE-3647-8006-0284C729AF85";
|
||||
String datasource = "ads";
|
||||
String collectionId = "1093";
|
||||
readAndIndexTest(testRecord, recordId, datasource, collectionId);
|
||||
}
|
||||
|
||||
private void readAndIndexTest(boolean testRecord, String recordId, String datasource, String collectionId) throws Exception {
|
||||
final ClassPathResource resource = new ClassPathResource("application.properties");
|
||||
Properties appProps = new Properties();
|
||||
appProps.load(resource.getInputStream());
|
||||
|
@ -54,20 +61,13 @@ public class GraphDbReaderAndESIndexTest {
|
|||
BulkUpload bulkUpload = new BulkUpload();
|
||||
bulkUpload.init(appProps.getProperty("elasticsearch.hostname"),appProps.getProperty("elasticsearch.indexname"));
|
||||
runSPQRLQuery.setBulkUpload(bulkUpload);
|
||||
String recordId;
|
||||
final ClassPathResource queryTemplateResource;
|
||||
boolean testRecord = true;
|
||||
if (testRecord) {
|
||||
recordId = "https://ariadne-infrastructure.eu/aocat/Resource/3037F979-F94F-380B-A6A6-3972ED10E61C";
|
||||
queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_record_data_template.sparql");
|
||||
}
|
||||
else {
|
||||
recordId = "https://ariadne-infrastructure.eu/aocat/Collection/CENIEH/A4A042CF-4FD6-3FB4-B701-CBC8A1653ADA";
|
||||
queryTemplateResource = new ClassPathResource("eu/dnetlib/ariadneplus/sparql/read_collection_data_template.sparql");
|
||||
}
|
||||
|
||||
String datasource = "cenieh";
|
||||
String collectionId = "171";
|
||||
List<String> recordIds = Arrays.asList(recordId);
|
||||
String queryTemplate = IOUtils.toString(queryTemplateResource.getInputStream(), StandardCharsets.UTF_8.name());
|
||||
if (testRecord) {
|
||||
|
|
|
@ -5,10 +5,19 @@ import org.junit.Test;
|
|||
|
||||
public class ESUtilsTest {
|
||||
|
||||
// Elastic search format: yyyy-MM-dd or yyyy
|
||||
|
||||
@Test
|
||||
public void testParseDate(){
|
||||
String date = "2013-03-13";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
Assert.assertEquals(date, parsed);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBSTDate(){
|
||||
String date = "Fri May 15 13:21:02 BST 2020";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
Assert.assertEquals(parsed, "2020-05-15");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue