smarter DatePicker for ISO dates on dateofacceptance

This commit is contained in:
Alessia Bardi 2021-06-16 14:56:26 +02:00
parent 9ca438d9b1
commit 6208b04f1d
2 changed files with 46 additions and 1 deletions

View File

@ -18,7 +18,7 @@ import eu.dnetlib.dhp.schema.oaf.Field;
public class DatePicker {
private static final String DATE_PATTERN = "\\d{4}-\\d{2}-\\d{2}";
private static final String DATE_PATTERN = "^(\\d{4})-(\\d{2})-(\\d{2})";
private static final String DATE_DEFAULT_SUFFIX = "01-01";
private static final int YEAR_LB = 1300;
private static final int YEAR_UB = Year.now().getValue() + 5;
@ -28,6 +28,7 @@ public class DatePicker {
final Map<String, Integer> frequencies = dateofacceptance
.parallelStream()
.filter(StringUtils::isNotBlank)
.map(d -> substringBefore(d, "T"))
.collect(Collectors.toConcurrentMap(w -> w, w -> 1, Integer::sum));
if (frequencies.isEmpty()) {

View File

@ -0,0 +1,44 @@
package eu.dnetlib.dhp.oa.dedup;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Collection;
import org.junit.jupiter.api.Test;
import com.clearspring.analytics.util.Lists;
public class DatePickerTest {
Collection<String> dates = Lists.newArrayList();
@Test
public void testPickISO() {
dates.add("2016-01-01T12:00:00Z");
dates.add("2016-06-16T12:00:00Z");
dates.add("2020-01-01T12:00:00Z");
dates.add("2020-10-01T12:00:00Z");
assertEquals("2020-10-01", DatePicker.pick(dates).getValue());
}
@Test
public void testPickSimple() {
dates.add("2016-01-01");
dates.add("2016-06-16");
dates.add("2020-01-01");
dates.add("2020-10-01");
assertEquals("2020-10-01", DatePicker.pick(dates).getValue());
}
@Test
public void testPickFrequent() {
dates.add("2016-02-01");
dates.add("2016-02-01");
dates.add("2016-02-01");
dates.add("2020-10-01");
assertEquals("2016-02-01", DatePicker.pick(dates).getValue());
}
}