new library for parsing dates
This commit is contained in:
parent
7c186ce3cb
commit
23c857906d
|
@ -138,6 +138,11 @@
|
|||
<artifactId>jts-core</artifactId>
|
||||
<version>1.19.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.sisyphsu</groupId>
|
||||
<artifactId>dateparser</artifactId>
|
||||
<version>1.0.7</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
|
|
@ -1,74 +1,46 @@
|
|||
package eu.dnetlib.ariadneplus.reader.utils;
|
||||
|
||||
import com.github.sisyphsu.dateparser.DateParser;
|
||||
import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.Year;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.time.format.TextStyle;
|
||||
import java.time.temporal.ChronoField;
|
||||
import java.util.Collections;
|
||||
import java.util.Locale;
|
||||
|
||||
public class ESUtils {
|
||||
private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy");
|
||||
private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
|
||||
private static DateTimeFormatter fastCatDateFormatter = DateTimeFormatter.ofPattern("dd/MM/yyyy");
|
||||
private static String elasticSearchDateFormat = "yyyy-MM-dd";
|
||||
|
||||
private static ZoneId preferredZone = ZoneId.of("Europe/London");
|
||||
private static DateTimeFormatter BST_FORMATTER = new DateTimeFormatterBuilder()
|
||||
.appendPattern("EEE MMM dd HH:mm:ss ")
|
||||
.appendZoneText(TextStyle.SHORT, Collections.singleton(preferredZone))
|
||||
.appendPattern(" yyyy")
|
||||
.toFormatter(Locale.ROOT);
|
||||
private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder()
|
||||
.appendPattern("yyyy").toFormatter();
|
||||
|
||||
private static DateTimeFormatter stringMonthDateFormatter = DateTimeFormatter.ofPattern("dd-MMM-yyyy");
|
||||
|
||||
public static String getESFormatDate(String originalDate) {
|
||||
try{
|
||||
LocalDate parsedDate = LocalDate.parse(originalDate, elasticSearchDateFormatter);
|
||||
return parsedDate.format(elasticSearchDateFormatter);
|
||||
} catch(Exception e){
|
||||
try {
|
||||
Year year = Year.parse(originalDate);
|
||||
return year.format(yearOnlyDateFormatter);
|
||||
} catch (Exception e0) {
|
||||
try {
|
||||
LocalDate parsedDate = LocalDate.parse(originalDate, originalRecordDateFormatter);
|
||||
return parsedDate.format(elasticSearchDateFormatter);
|
||||
} catch (Exception e1) {
|
||||
try {
|
||||
LocalDate parsedDate = LocalDate.parse(originalDate.substring(0, 10), elasticSearchDateFormatter);
|
||||
return parsedDate.format(elasticSearchDateFormatter);
|
||||
} catch (Exception e2) {
|
||||
try {
|
||||
return parseBST(originalDate);
|
||||
} catch (Exception e3) {
|
||||
try {
|
||||
LocalDate parsedDate = LocalDate.parse(originalDate, fastCatDateFormatter);
|
||||
return parsedDate.format(elasticSearchDateFormatter);
|
||||
} catch (Exception e4) {
|
||||
try {
|
||||
LocalDate parsedDate = LocalDate.parse(originalDate, stringMonthDateFormatter);
|
||||
return parsedDate.format(elasticSearchDateFormatter);
|
||||
} catch (Exception e5) {
|
||||
return "0000";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (StringUtils.isBlank(originalDate)) {
|
||||
return null;
|
||||
}
|
||||
String inputDate = originalDate.trim();
|
||||
// the library completes with 01-01 if we hae only the year: we do not want that.
|
||||
if(inputDate.length() == 4 && StringUtils.isNumeric(inputDate)){
|
||||
return inputDate;
|
||||
}
|
||||
try {
|
||||
final LocalDate date = DateParserUtils
|
||||
.parseDate(inputDate.trim())
|
||||
.toInstant()
|
||||
.atZone(ZoneId.systemDefault())
|
||||
.toLocalDate();
|
||||
return DateTimeFormatter.ofPattern(elasticSearchDateFormat).format(date);
|
||||
} catch (DateTimeParseException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String parseBST(String BSTDate) {
|
||||
ZonedDateTime zd = ZonedDateTime.parse(BSTDate, BST_FORMATTER);
|
||||
return zd.format(elasticSearchDateFormatter);
|
||||
}
|
||||
|
||||
public static String bytesToHex(byte[] hash) {
|
||||
StringBuilder hexString = new StringBuilder(2 * hash.length);
|
||||
|
|
|
@ -32,13 +32,31 @@ public class GraphDbReaderAndESIndexTest {
|
|||
|
||||
private RunSPARQLQueryService runSPQRLQuery;
|
||||
|
||||
@Test
|
||||
public void testADS() throws Exception {
|
||||
@Test
|
||||
public void testADS1093Record() throws Exception {
|
||||
String id ="https://ariadne-infrastructure.eu/aocat/Resource/ADS/D182802E-592C-3999-9DB2-155F25E356E2";
|
||||
String datasource = "ads";
|
||||
String apiId = "1093";
|
||||
readAndIndexTest(true, id, datasource, apiId);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testADS1093Collection() throws Exception {
|
||||
String id ="https://ariadne-infrastructure.eu/aocat/Collection/ADS/5910411B-ED97-364E-8D28-6024558AA14B";
|
||||
String datasource = "ads";
|
||||
String apiId = "1093";
|
||||
readAndIndexTest(false, id, datasource, apiId);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testADS() throws Exception {
|
||||
String id = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/90D1C95D-E249-3E74-92D9-B58FDF690CC7";
|
||||
String datasource = "ads";
|
||||
String apiId = "archives";
|
||||
readAndIndexTest(true, id, datasource, apiId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
|
|
|
@ -11,43 +11,69 @@ public class ESUtilsTest {
|
|||
public void testParseDate(){
|
||||
String date = "2013-03-13";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
Assert.assertEquals(date, parsed);
|
||||
Assert.assertEquals(parsed, date);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBSTDate(){
|
||||
String date = "Fri May 15 13:21:02 BST 2020";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
Assert.assertEquals(parsed, "2020-05-15");
|
||||
Assert.assertEquals("2020-05-15", parsed);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testYearDate(){
|
||||
String date = "2012";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
Assert.assertEquals(parsed, "2012");
|
||||
Assert.assertEquals("2012", parsed);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmtyDate(){
|
||||
String date = "";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
Assert.assertEquals(parsed, "0000");
|
||||
Assert.assertEquals("0000", parsed);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testErrorDate(){
|
||||
String date = "????";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
Assert.assertEquals(parsed, "0000");
|
||||
Assert.assertEquals("0000", parsed);
|
||||
}
|
||||
@Test
|
||||
public void testUnknownDate(){
|
||||
String date = "unknown";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
Assert.assertEquals("0000", parsed);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDateMonthString(){
|
||||
String date = "27-Oct-2022";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
System.out.println(parsed);
|
||||
Assert.assertEquals(parsed, "2022-10-27");
|
||||
Assert.assertEquals("2022-10-27", parsed);
|
||||
}
|
||||
@Test
|
||||
public void testDateCapitalMonthString(){
|
||||
String date = "27-OCT-2022";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
System.out.println(parsed);
|
||||
Assert.assertEquals("2022-10-27", parsed);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDateMonthString2(){
|
||||
String date = "27 Oct 2022";
|
||||
String parsed = ESUtils.getESFormatDate(date);
|
||||
System.out.println(parsed);
|
||||
Assert.assertEquals("2022-10-27", parsed);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue