new library for parsing dates

master
Alessia Bardi 1 year ago
parent 7c186ce3cb
commit 23c857906d

@ -138,6 +138,11 @@
<artifactId>jts-core</artifactId>
<version>1.19.0</version>
</dependency>
<dependency>
<groupId>com.github.sisyphsu</groupId>
<artifactId>dateparser</artifactId>
<version>1.0.7</version>
</dependency>
</dependencies>

@ -1,74 +1,46 @@
package eu.dnetlib.ariadneplus.reader.utils;
import com.github.sisyphsu.dateparser.DateParser;
import com.github.sisyphsu.dateparser.DateParserUtils;
import org.apache.commons.lang3.StringUtils;
import java.time.LocalDate;
import java.time.Year;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.time.format.TextStyle;
import java.time.temporal.ChronoField;
import java.util.Collections;
import java.util.Locale;
public class ESUtils {
private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy");
private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static DateTimeFormatter fastCatDateFormatter = DateTimeFormatter.ofPattern("dd/MM/yyyy");
private static ZoneId preferredZone = ZoneId.of("Europe/London");
private static DateTimeFormatter BST_FORMATTER = new DateTimeFormatterBuilder()
.appendPattern("EEE MMM dd HH:mm:ss ")
.appendZoneText(TextStyle.SHORT, Collections.singleton(preferredZone))
.appendPattern(" yyyy")
.toFormatter(Locale.ROOT);
private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder()
.appendPattern("yyyy").toFormatter();
private static String elasticSearchDateFormat = "yyyy-MM-dd";
private static DateTimeFormatter stringMonthDateFormatter = DateTimeFormatter.ofPattern("dd-MMM-yyyy");
public static String getESFormatDate(String originalDate) {
try{
LocalDate parsedDate = LocalDate.parse(originalDate, elasticSearchDateFormatter);
return parsedDate.format(elasticSearchDateFormatter);
} catch(Exception e){
public static String getESFormatDate(String originalDate) {
if (StringUtils.isBlank(originalDate)) {
return null;
}
String inputDate = originalDate.trim();
// the library completes with 01-01 if we hae only the year: we do not want that.
if(inputDate.length() == 4 && StringUtils.isNumeric(inputDate)){
return inputDate;
}
try {
Year year = Year.parse(originalDate);
return year.format(yearOnlyDateFormatter);
} catch (Exception e0) {
try {
LocalDate parsedDate = LocalDate.parse(originalDate, originalRecordDateFormatter);
return parsedDate.format(elasticSearchDateFormatter);
} catch (Exception e1) {
try {
LocalDate parsedDate = LocalDate.parse(originalDate.substring(0, 10), elasticSearchDateFormatter);
return parsedDate.format(elasticSearchDateFormatter);
} catch (Exception e2) {
try {
return parseBST(originalDate);
} catch (Exception e3) {
try {
LocalDate parsedDate = LocalDate.parse(originalDate, fastCatDateFormatter);
return parsedDate.format(elasticSearchDateFormatter);
} catch (Exception e4) {
try {
LocalDate parsedDate = LocalDate.parse(originalDate, stringMonthDateFormatter);
return parsedDate.format(elasticSearchDateFormatter);
} catch (Exception e5) {
return "0000";
}
}
}
}
}
final LocalDate date = DateParserUtils
.parseDate(inputDate.trim())
.toInstant()
.atZone(ZoneId.systemDefault())
.toLocalDate();
return DateTimeFormatter.ofPattern(elasticSearchDateFormat).format(date);
} catch (DateTimeParseException e) {
return null;
}
}
}
private static String parseBST(String BSTDate) {
ZonedDateTime zd = ZonedDateTime.parse(BSTDate, BST_FORMATTER);
return zd.format(elasticSearchDateFormatter);
}
public static String bytesToHex(byte[] hash) {
StringBuilder hexString = new StringBuilder(2 * hash.length);

@ -32,13 +32,31 @@ public class GraphDbReaderAndESIndexTest {
private RunSPARQLQueryService runSPQRLQuery;
@Test
public void testADS() throws Exception {
String id = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/90D1C95D-E249-3E74-92D9-B58FDF690CC7";
String datasource = "ads";
String apiId = "archives";
readAndIndexTest(true, id, datasource, apiId);
}
@Test
public void testADS1093Record() throws Exception {
String id ="https://ariadne-infrastructure.eu/aocat/Resource/ADS/D182802E-592C-3999-9DB2-155F25E356E2";
String datasource = "ads";
String apiId = "1093";
readAndIndexTest(true, id, datasource, apiId);
}
@Test
public void testADS1093Collection() throws Exception {
String id ="https://ariadne-infrastructure.eu/aocat/Collection/ADS/5910411B-ED97-364E-8D28-6024558AA14B";
String datasource = "ads";
String apiId = "1093";
readAndIndexTest(false, id, datasource, apiId);
}
@Test
public void testADS() throws Exception {
String id = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/90D1C95D-E249-3E74-92D9-B58FDF690CC7";
String datasource = "ads";
String apiId = "archives";
readAndIndexTest(true, id, datasource, apiId);
}
@Test

@ -11,43 +11,69 @@ public class ESUtilsTest {
public void testParseDate(){
String date = "2013-03-13";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(date, parsed);
Assert.assertEquals(parsed, date);
}
@Test
public void testBSTDate(){
String date = "Fri May 15 13:21:02 BST 2020";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(parsed, "2020-05-15");
Assert.assertEquals("2020-05-15", parsed);
}
@Test
public void testYearDate(){
String date = "2012";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(parsed, "2012");
Assert.assertEquals("2012", parsed);
}
@Test
public void testEmtyDate(){
String date = "";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(parsed, "0000");
Assert.assertEquals("0000", parsed);
}
@Test
public void testErrorDate(){
String date = "????";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals(parsed, "0000");
Assert.assertEquals("0000", parsed);
}
@Test
public void testUnknownDate(){
String date = "unknown";
String parsed = ESUtils.getESFormatDate(date);
Assert.assertEquals("0000", parsed);
}
@Test
public void testDateMonthString(){
String date = "27-Oct-2022";
String parsed = ESUtils.getESFormatDate(date);
System.out.println(parsed);
Assert.assertEquals(parsed, "2022-10-27");
Assert.assertEquals("2022-10-27", parsed);
}
@Test
public void testDateCapitalMonthString(){
String date = "27-OCT-2022";
String parsed = ESUtils.getESFormatDate(date);
System.out.println(parsed);
Assert.assertEquals("2022-10-27", parsed);
}
@Test
public void testDateMonthString2(){
String date = "27 Oct 2022";
String parsed = ESUtils.getESFormatDate(date);
System.out.println(parsed);
Assert.assertEquals("2022-10-27", parsed);
}
}

Loading…
Cancel
Save