From a900bfb87434f41eac1530e833565a56351afe70 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Jun 2021 16:53:01 +0200 Subject: [PATCH] delegating the date parsing to https://github.com/sisyphsu/dateparser --- dhp-common/pom.xml | 4 +- .../oaf/utils/GraphCleaningFunctions.java | 82 +++++++++++-- .../schema/oaf/utils/OafMapperUtilsTest.java | 108 +++++++++++++++++- .../dhp/transformation/xslt/DateCleaner.java | 83 +------------- .../transformation/TransformationJobTest.java | 10 +- .../clean/GraphCleaningFunctionsTest.java | 2 + .../eu/dnetlib/dhp/oa/graph/clean/result.json | 2 +- pom.xml | 12 +- 8 files changed, 200 insertions(+), 103 deletions(-) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index b1494f649..74f31cf35 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -22,8 +22,8 @@ hadoop-common - commons-validator - commons-validator + com.github.sisyphsu + dateparser org.apache.spark diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index da253c681..999272113 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -1,15 +1,23 @@ package eu.dnetlib.dhp.schema.oaf.utils; +import java.time.LocalDate; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; import java.util.*; import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.validator.GenericValidator; +import org.jetbrains.annotations.NotNull; +import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -119,14 +127,42 @@ public class GraphCleaningFunctions extends CleaningFunctions { } else if (value instanceof Relation) { Relation r = (Relation) value; - if (!isValidDate(r.getValidationDate())) { + Optional validationDate = doCleanDate(r.getValidationDate()); + if (validationDate.isPresent()) { + r.setValidationDate(validationDate.get()); + r.setValidated(true); + } else { r.setValidationDate(null); r.setValidated(false); } - } else if (value instanceof Result) { Result r = (Result) value; + + if (Objects.nonNull(r.getDateofacceptance())) { + Optional date = cleanDateField(r.getDateofacceptance()); + if (date.isPresent()) { + r.getDateofacceptance().setValue(date.get()); + } else { + r.setDateofacceptance(null); + } + } + if (Objects.nonNull(r.getRelevantdate())) { + r + .setRelevantdate( + r + .getRelevantdate() + .stream() + .filter(Objects::nonNull) + .filter(sp -> Objects.nonNull(sp.getQualifier())) + .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) + .map(sp -> { + sp.setValue(GraphCleaningFunctions.cleanDate(sp.getValue())); + return sp; + }) + .filter(sp -> StringUtils.isNotBlank(sp.getValue())) + .collect(Collectors.toList())); + } if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) { r.setPublisher(null); } @@ -222,6 +258,14 @@ public class GraphCleaningFunctions extends CleaningFunctions { if (Objects.isNull(i.getRefereed())) { i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS)); } + if (Objects.nonNull(i.getDateofacceptance())) { + Optional date = cleanDateField(i.getDateofacceptance()); + if (date.isPresent()) { + i.getDateofacceptance().setValue(date.get()); + } else { + i.setDateofacceptance(null); + } + } } } if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) { @@ -300,10 +344,34 @@ public class GraphCleaningFunctions extends CleaningFunctions { return value; } - protected static boolean isValidDate(String date) { - return Stream - .of(ModelSupport.DATE_TIME_FORMATS) - .anyMatch(format -> GenericValidator.isDate(date, format, false)); + private static Optional cleanDateField(Field dateofacceptance) { + return Optional + .ofNullable(dateofacceptance) + .map(Field::getValue) + .map(GraphCleaningFunctions::cleanDate) + .filter(Objects::nonNull); + } + + protected static Optional doCleanDate(String date) { + return Optional.ofNullable(cleanDate(date)); + } + + public static String cleanDate(final String inputDate) { + + if (StringUtils.isBlank(inputDate)) { + return null; + } + + try { + final LocalDate date = DateParserUtils + .parseDate(inputDate.trim()) + .toInstant() + .atZone(ZoneId.systemDefault()) + .toLocalDate(); + return DateTimeFormatter.ofPattern(ModelSupport.DATE_FORMAT).format(date); + } catch (DateTimeParseException e) { + return null; + } } // HELPERS diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index e8135f201..b2cc669fe 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -4,9 +4,12 @@ package eu.dnetlib.dhp.schema.oaf.utils; import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; -import java.time.format.DateTimeParseException; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; import java.util.HashSet; import java.util.List; +import java.util.Locale; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -26,10 +29,105 @@ public class OafMapperUtilsTest { @Test public void testDateValidation() { - assertTrue(GraphCleaningFunctions.isValidDate("2016-05-07T12:41:19.202Z")); - assertTrue(GraphCleaningFunctions.isValidDate("2020-09-10 11:08:52")); - assertTrue(GraphCleaningFunctions.isValidDate("2016-04-05")); - assertFalse(GraphCleaningFunctions.isValidDate("2016 April 05")); + assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); + assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); + assertTrue(GraphCleaningFunctions.doCleanDate(" 2016-04-05").isPresent()); + + assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get()); + + assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get()); + assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get()); + assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get()); + assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get()); + assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get()); + assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 2006").get()); + assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 MST 2006").get()); + assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get()); + assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get()); + assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get()); + assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get()); + assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get()); + assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get()); + assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get()); + assertEquals( + "2015-07-03", + GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get()); + assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get()); + assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get()); + assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get()); + assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get()); + assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get()); + assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get()); + assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get()); + assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get()); + assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get()); + assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get()); + assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get()); + assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get()); + assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get()); + assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get()); + assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get()); + assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get()); + assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get()); + assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get()); + assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get()); + assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get()); + assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get()); + assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get()); + assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get()); + assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get()); + assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get()); + assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get()); + assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get()); + assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get()); + assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get()); + assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get()); + assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get()); + assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get()); + assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get()); + assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get()); + assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get()); + assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get()); + assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get()); + assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get()); + assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get()); + assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get()); + assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get()); + assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get()); + assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get()); + assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get()); + assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get()); + assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get()); + assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get()); + assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get()); + assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get()); + assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get()); + assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get()); + assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get()); + assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get()); + assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get()); + assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get()); + assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get()); + assertEquals( + "2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get()); + assertEquals( + "2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get()); + assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get()); + assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get()); + assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get()); + assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get()); + assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get()); + assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get()); + assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get()); + assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get()); + assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get()); + assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get()); + assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get()); + assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get()); + assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get()); + assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get()); + assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get()); + assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java index 6e337604f..9da0747e6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java @@ -10,87 +10,11 @@ import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import net.sf.saxon.s9api.*; public class DateCleaner implements ExtensionFunction, Serializable { - private final static List dateRegex = Arrays - .asList( - // Y-M-D - Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE), - // M-D-Y - Pattern - .compile( - "((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", - Pattern.MULTILINE), - // D-M-Y - Pattern - .compile( - "(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", - Pattern.MULTILINE), - // Y - Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE)); - - private final static Pattern incompleteDateRegex = Pattern - .compile("^((18|19|20)\\d\\d){1}([- \\\\ \\/](0?[1-9]|1[012]))?", Pattern.MULTILINE); - - private final static List dformats = Arrays - .asList( - DateTimeFormatter - .ofPattern( - "[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", - Locale.ENGLISH), - DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)); - - public String clean(final String inputDate) { - - Optional cleanedDate = dateRegex - .stream() - .map( - p -> { - final Matcher matcher = p.matcher(inputDate); - if (matcher.find()) - return matcher.group(0); - else - return null; - }) - .filter(Objects::nonNull) - .map(m -> { - Optional cleanDate = dformats - .stream() - .map(f -> { - try { - LocalDate parsedDate = LocalDate.parse(m, f); - if (parsedDate != null) - return parsedDate.toString(); - else - return null; - } catch (Throwable e) { - return null; - } - } - - ) - .filter(Objects::nonNull) - .findAny(); - - return cleanDate.orElse(null); - }) - .filter(Objects::nonNull) - .findAny(); - - if (cleanedDate.isPresent()) - return cleanedDate.get(); - - final Matcher matcher = incompleteDateRegex.matcher(inputDate); - if (matcher.find()) { - final Integer year = Integer.parseInt(matcher.group(1)); - final Integer month = Integer.parseInt(matcher.group(4) == null ? "01" : matcher.group(4)); - return String.format("%d-%02d-01", year, month); - } - return null; - } - @Override public QName getName() { return new QName(QNAME_BASE_URI + "/dateISO", "dateISO"); @@ -117,4 +41,9 @@ public class DateCleaner implements ExtensionFunction, Serializable { final String currentValue = xdmValues[0].itemAt(0).getStringValue(); return new XdmAtomicValue(clean(currentValue)); } + + // for backward compatibility with the existing unit tests + public String clean(String date) { + return GraphCleaningFunctions.cleanDate(date); + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 0fdc89533..948a8f93b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -51,11 +51,11 @@ public class TransformationJobTest extends AbstractVocabularyTest { @DisplayName("Test Date cleaner") public void testDateCleaner() throws Exception { DateCleaner dc = new DateCleaner(); - assertEquals(dc.clean("20/09/1982"), "1982-09-20"); - assertEquals(dc.clean("20-09-2002"), "2002-09-20"); - assertEquals(dc.clean("2002-09-20"), "2002-09-20"); - assertEquals(dc.clean("2002-9"), "2002-09-01"); - assertEquals(dc.clean("2021"), "2021-01-01"); + assertEquals("1982-09-20", dc.clean("20/09/1982")); + assertEquals("2002-09-20", dc.clean("20-09-2002")); + assertEquals("2002-09-20", dc.clean("2002-09-20")); + assertEquals("2002-09-01", dc.clean("2002-9")); + assertEquals("2021-01-01", dc.clean("2021")); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index c23354e25..b196d1948 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -129,6 +129,8 @@ public class GraphCleaningFunctionsTest { assertEquals("CLOSED", p_cleaned.getBestaccessright().getClassid()); assertNull(p_out.getPublisher()); + assertEquals("1970-10-07", p_cleaned.getDateofacceptance().getValue()); + final List pci = p_cleaned.getInstance(); assertNotNull(pci); assertEquals(1, pci.size()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 8670c10f1..6795ccf1b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -301,7 +301,7 @@ }, "trust": "0.9" }, - "value": "2016-01-01" + "value": "7 oct 1970" }, "dateofcollection": "", "dateoftransformation": "2020-04-22T12:34:08.009Z", diff --git a/pom.xml b/pom.xml index 5b96816d9..4272acae0 100644 --- a/pom.xml +++ b/pom.xml @@ -200,11 +200,11 @@ ${dhp.commons.lang.version} - - commons-validator - commons-validator - 1.7 - + + com.github.sisyphsu + dateparser + 1.0.7 + com.google.guava @@ -736,7 +736,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.5.11] + [2.5.12-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6]