From 6208b04f1d8518e10bee49eff3d417aa6d58a336 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 16 Jun 2021 14:56:26 +0200 Subject: [PATCH] smarter DatePicker for ISO dates on dateofacceptance --- .../eu/dnetlib/dhp/oa/dedup/DatePicker.java | 3 +- .../dnetlib/dhp/oa/dedup/DatePickerTest.java | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java index 70fb2cc5b..3a789c14c 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java @@ -18,7 +18,7 @@ import eu.dnetlib.dhp.schema.oaf.Field; public class DatePicker { - private static final String DATE_PATTERN = "\\d{4}-\\d{2}-\\d{2}"; + private static final String DATE_PATTERN = "^(\\d{4})-(\\d{2})-(\\d{2})"; private static final String DATE_DEFAULT_SUFFIX = "01-01"; private static final int YEAR_LB = 1300; private static final int YEAR_UB = Year.now().getValue() + 5; @@ -28,6 +28,7 @@ public class DatePicker { final Map frequencies = dateofacceptance .parallelStream() .filter(StringUtils::isNotBlank) + .map(d -> substringBefore(d, "T")) .collect(Collectors.toConcurrentMap(w -> w, w -> 1, Integer::sum)); if (frequencies.isEmpty()) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java new file mode 100644 index 000000000..7c58c375a --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Collection; + +import org.junit.jupiter.api.Test; + +import com.clearspring.analytics.util.Lists; + +public class DatePickerTest { + + Collection dates = Lists.newArrayList(); + + @Test + public void testPickISO() { + dates.add("2016-01-01T12:00:00Z"); + dates.add("2016-06-16T12:00:00Z"); + dates.add("2020-01-01T12:00:00Z"); + dates.add("2020-10-01T12:00:00Z"); + assertEquals("2020-10-01", DatePicker.pick(dates).getValue()); + } + + @Test + public void testPickSimple() { + dates.add("2016-01-01"); + dates.add("2016-06-16"); + dates.add("2020-01-01"); + dates.add("2020-10-01"); + assertEquals("2020-10-01", DatePicker.pick(dates).getValue()); + } + + @Test + public void testPickFrequent() { + dates.add("2016-02-01"); + dates.add("2016-02-01"); + dates.add("2016-02-01"); + dates.add("2020-10-01"); + assertEquals("2016-02-01", DatePicker.pick(dates).getValue()); + } + +}