From 23c857906d72e808d86d31207830a1f636b037ea Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 17 Nov 2022 14:38:03 +0100 Subject: [PATCH] new library for parsing dates --- dnet-ariadneplus-graphdb-publisher/pom.xml | 5 ++ .../ariadneplus/reader/utils/ESUtils.java | 76 ++++++------------- .../GraphDbReaderAndESIndexTest.java | 32 ++++++-- .../ariadneplus/reader/utils/ESUtilsTest.java | 38 ++++++++-- 4 files changed, 86 insertions(+), 65 deletions(-) diff --git a/dnet-ariadneplus-graphdb-publisher/pom.xml b/dnet-ariadneplus-graphdb-publisher/pom.xml index 5d1d58b..a4a562a 100644 --- a/dnet-ariadneplus-graphdb-publisher/pom.xml +++ b/dnet-ariadneplus-graphdb-publisher/pom.xml @@ -138,6 +138,11 @@ jts-core 1.19.0 + + com.github.sisyphsu + dateparser + 1.0.7 + diff --git a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/utils/ESUtils.java b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/utils/ESUtils.java index 2e7e7f7..dba1fb5 100644 --- a/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/utils/ESUtils.java +++ b/dnet-ariadneplus-graphdb-publisher/src/main/java/eu/dnetlib/ariadneplus/reader/utils/ESUtils.java @@ -1,74 +1,46 @@ package eu.dnetlib.ariadneplus.reader.utils; +import com.github.sisyphsu.dateparser.DateParser; +import com.github.sisyphsu.dateparser.DateParserUtils; +import org.apache.commons.lang3.StringUtils; + import java.time.LocalDate; import java.time.Year; import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; import java.time.format.TextStyle; import java.time.temporal.ChronoField; import java.util.Collections; import java.util.Locale; public class ESUtils { - private static DateTimeFormatter originalRecordDateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy"); - private static DateTimeFormatter elasticSearchDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); - private static DateTimeFormatter fastCatDateFormatter = DateTimeFormatter.ofPattern("dd/MM/yyyy"); + private static String elasticSearchDateFormat = "yyyy-MM-dd"; - private static ZoneId preferredZone = ZoneId.of("Europe/London"); - private static DateTimeFormatter BST_FORMATTER = new DateTimeFormatterBuilder() - .appendPattern("EEE MMM dd HH:mm:ss ") - .appendZoneText(TextStyle.SHORT, Collections.singleton(preferredZone)) - .appendPattern(" yyyy") - .toFormatter(Locale.ROOT); - private static DateTimeFormatter yearOnlyDateFormatter = new DateTimeFormatterBuilder() - .appendPattern("yyyy").toFormatter(); - private static DateTimeFormatter stringMonthDateFormatter = DateTimeFormatter.ofPattern("dd-MMM-yyyy"); - - public static String getESFormatDate(String originalDate) { - try{ - LocalDate parsedDate = LocalDate.parse(originalDate, elasticSearchDateFormatter); - return parsedDate.format(elasticSearchDateFormatter); - } catch(Exception e){ - try { - Year year = Year.parse(originalDate); - return year.format(yearOnlyDateFormatter); - } catch (Exception e0) { - try { - LocalDate parsedDate = LocalDate.parse(originalDate, originalRecordDateFormatter); - return parsedDate.format(elasticSearchDateFormatter); - } catch (Exception e1) { - try { - LocalDate parsedDate = LocalDate.parse(originalDate.substring(0, 10), elasticSearchDateFormatter); - return parsedDate.format(elasticSearchDateFormatter); - } catch (Exception e2) { - try { - return parseBST(originalDate); - } catch (Exception e3) { - try { - LocalDate parsedDate = LocalDate.parse(originalDate, fastCatDateFormatter); - return parsedDate.format(elasticSearchDateFormatter); - } catch (Exception e4) { - try { - LocalDate parsedDate = LocalDate.parse(originalDate, stringMonthDateFormatter); - return parsedDate.format(elasticSearchDateFormatter); - } catch (Exception e5) { - return "0000"; - } - } - } - } - } + public static String getESFormatDate(String originalDate) { + if (StringUtils.isBlank(originalDate)) { + return null; + } + String inputDate = originalDate.trim(); + // the library completes with 01-01 if we hae only the year: we do not want that. + if(inputDate.length() == 4 && StringUtils.isNumeric(inputDate)){ + return inputDate; + } + try { + final LocalDate date = DateParserUtils + .parseDate(inputDate.trim()) + .toInstant() + .atZone(ZoneId.systemDefault()) + .toLocalDate(); + return DateTimeFormatter.ofPattern(elasticSearchDateFormat).format(date); + } catch (DateTimeParseException e) { + return null; } - } } - private static String parseBST(String BSTDate) { - ZonedDateTime zd = ZonedDateTime.parse(BSTDate, BST_FORMATTER); - return zd.format(elasticSearchDateFormatter); - } public static String bytesToHex(byte[] hash) { StringBuilder hexString = new StringBuilder(2 * hash.length); diff --git a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java index 0b49b0b..390468b 100644 --- a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java +++ b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/GraphDbReaderAndESIndexTest.java @@ -32,13 +32,31 @@ public class GraphDbReaderAndESIndexTest { private RunSPARQLQueryService runSPQRLQuery; -@Test -public void testADS() throws Exception { - String id = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/90D1C95D-E249-3E74-92D9-B58FDF690CC7"; - String datasource = "ads"; - String apiId = "archives"; - readAndIndexTest(true, id, datasource, apiId); -} + @Test + public void testADS1093Record() throws Exception { + String id ="https://ariadne-infrastructure.eu/aocat/Resource/ADS/D182802E-592C-3999-9DB2-155F25E356E2"; + String datasource = "ads"; + String apiId = "1093"; + readAndIndexTest(true, id, datasource, apiId); + } + + @Test + public void testADS1093Collection() throws Exception { + String id ="https://ariadne-infrastructure.eu/aocat/Collection/ADS/5910411B-ED97-364E-8D28-6024558AA14B"; + String datasource = "ads"; + String apiId = "1093"; + readAndIndexTest(false, id, datasource, apiId); + } + + + + @Test + public void testADS() throws Exception { + String id = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/90D1C95D-E249-3E74-92D9-B58FDF690CC7"; + String datasource = "ads"; + String apiId = "archives"; + readAndIndexTest(true, id, datasource, apiId); + } @Test diff --git a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/reader/utils/ESUtilsTest.java b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/reader/utils/ESUtilsTest.java index 2376445..5870a6a 100644 --- a/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/reader/utils/ESUtilsTest.java +++ b/dnet-ariadneplus-graphdb-publisher/test/java/eu/dnetlib/ariadneplus/reader/utils/ESUtilsTest.java @@ -11,43 +11,69 @@ public class ESUtilsTest { public void testParseDate(){ String date = "2013-03-13"; String parsed = ESUtils.getESFormatDate(date); - Assert.assertEquals(date, parsed); + Assert.assertEquals(parsed, date); } @Test public void testBSTDate(){ String date = "Fri May 15 13:21:02 BST 2020"; String parsed = ESUtils.getESFormatDate(date); - Assert.assertEquals(parsed, "2020-05-15"); + Assert.assertEquals("2020-05-15", parsed); } + @Test public void testYearDate(){ String date = "2012"; String parsed = ESUtils.getESFormatDate(date); - Assert.assertEquals(parsed, "2012"); + Assert.assertEquals("2012", parsed); } @Test public void testEmtyDate(){ String date = ""; String parsed = ESUtils.getESFormatDate(date); - Assert.assertEquals(parsed, "0000"); + Assert.assertEquals("0000", parsed); } @Test public void testErrorDate(){ String date = "????"; String parsed = ESUtils.getESFormatDate(date); - Assert.assertEquals(parsed, "0000"); + Assert.assertEquals("0000", parsed); } + @Test + public void testUnknownDate(){ + String date = "unknown"; + String parsed = ESUtils.getESFormatDate(date); + Assert.assertEquals("0000", parsed); + } + @Test public void testDateMonthString(){ String date = "27-Oct-2022"; String parsed = ESUtils.getESFormatDate(date); System.out.println(parsed); - Assert.assertEquals(parsed, "2022-10-27"); + Assert.assertEquals("2022-10-27", parsed); + } + @Test + public void testDateCapitalMonthString(){ + String date = "27-OCT-2022"; + String parsed = ESUtils.getESFormatDate(date); + System.out.println(parsed); + Assert.assertEquals("2022-10-27", parsed); } + @Test + public void testDateMonthString2(){ + String date = "27 Oct 2022"; + String parsed = ESUtils.getESFormatDate(date); + System.out.println(parsed); + Assert.assertEquals("2022-10-27", parsed); + } + + + + }