forked from D-Net/dnet-hadoop
smarter DatePicker for ISO dates on dateofacceptance
This commit is contained in:
parent
9ca438d9b1
commit
6208b04f1d
|
@ -18,7 +18,7 @@ import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
|
||||||
public class DatePicker {
|
public class DatePicker {
|
||||||
|
|
||||||
private static final String DATE_PATTERN = "\\d{4}-\\d{2}-\\d{2}";
|
private static final String DATE_PATTERN = "^(\\d{4})-(\\d{2})-(\\d{2})";
|
||||||
private static final String DATE_DEFAULT_SUFFIX = "01-01";
|
private static final String DATE_DEFAULT_SUFFIX = "01-01";
|
||||||
private static final int YEAR_LB = 1300;
|
private static final int YEAR_LB = 1300;
|
||||||
private static final int YEAR_UB = Year.now().getValue() + 5;
|
private static final int YEAR_UB = Year.now().getValue() + 5;
|
||||||
|
@ -28,6 +28,7 @@ public class DatePicker {
|
||||||
final Map<String, Integer> frequencies = dateofacceptance
|
final Map<String, Integer> frequencies = dateofacceptance
|
||||||
.parallelStream()
|
.parallelStream()
|
||||||
.filter(StringUtils::isNotBlank)
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.map(d -> substringBefore(d, "T"))
|
||||||
.collect(Collectors.toConcurrentMap(w -> w, w -> 1, Integer::sum));
|
.collect(Collectors.toConcurrentMap(w -> w, w -> 1, Integer::sum));
|
||||||
|
|
||||||
if (frequencies.isEmpty()) {
|
if (frequencies.isEmpty()) {
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.clearspring.analytics.util.Lists;
|
||||||
|
|
||||||
|
public class DatePickerTest {
|
||||||
|
|
||||||
|
Collection<String> dates = Lists.newArrayList();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPickISO() {
|
||||||
|
dates.add("2016-01-01T12:00:00Z");
|
||||||
|
dates.add("2016-06-16T12:00:00Z");
|
||||||
|
dates.add("2020-01-01T12:00:00Z");
|
||||||
|
dates.add("2020-10-01T12:00:00Z");
|
||||||
|
assertEquals("2020-10-01", DatePicker.pick(dates).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPickSimple() {
|
||||||
|
dates.add("2016-01-01");
|
||||||
|
dates.add("2016-06-16");
|
||||||
|
dates.add("2020-01-01");
|
||||||
|
dates.add("2020-10-01");
|
||||||
|
assertEquals("2020-10-01", DatePicker.pick(dates).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPickFrequent() {
|
||||||
|
dates.add("2016-02-01");
|
||||||
|
dates.add("2016-02-01");
|
||||||
|
dates.add("2016-02-01");
|
||||||
|
dates.add("2020-10-01");
|
||||||
|
assertEquals("2016-02-01", DatePicker.pick(dates).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue