diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index acac3594f..b1494f649 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -21,6 +21,10 @@
org.apache.hadoop
hadoop-common
+
+ commons-validator
+ commons-validator
+
org.apache.spark
spark-core_2.11
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
index 15fff07c0..da253c681 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@@ -7,11 +7,13 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.validator.GenericValidator;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
public class GraphCleaningFunctions extends CleaningFunctions {
@@ -115,7 +117,13 @@ public class GraphCleaningFunctions extends CleaningFunctions {
o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
}
} else if (value instanceof Relation) {
- // nothing to clean here
+ Relation r = (Relation) value;
+
+ if (!isValidDate(r.getValidationDate())) {
+ r.setValidationDate(null);
+ r.setValidated(false);
+ }
+
} else if (value instanceof Result) {
Result r = (Result) value;
@@ -292,6 +300,12 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return value;
}
+ protected static boolean isValidDate(String date) {
+ return Stream
+ .of(ModelSupport.DATE_TIME_FORMATS)
+ .anyMatch(format -> GenericValidator.isDate(date, format, false));
+ }
+
// HELPERS
private static boolean isValidAuthorName(Author a) {
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
index 7256d6489..e8135f201 100644
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@@ -4,6 +4,7 @@ package eu.dnetlib.dhp.schema.oaf.utils;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
+import java.time.format.DateTimeParseException;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
@@ -15,16 +16,23 @@ import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.Dataset;
-import eu.dnetlib.dhp.schema.oaf.KeyValue;
-import eu.dnetlib.dhp.schema.oaf.Publication;
-import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.*;
public class OafMapperUtilsTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+ @Test
+ public void testDateValidation() {
+
+ assertTrue(GraphCleaningFunctions.isValidDate("2016-05-07T12:41:19.202Z"));
+ assertTrue(GraphCleaningFunctions.isValidDate("2020-09-10 11:08:52"));
+ assertTrue(GraphCleaningFunctions.isValidDate("2016-04-05"));
+ assertFalse(GraphCleaningFunctions.isValidDate("2016 April 05"));
+
+ }
+
@Test
public void testMergePubs() throws IOException {
Publication p1 = read("publication_1.json", Publication.class);
diff --git a/pom.xml b/pom.xml
index 5e5fec308..5b96816d9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -200,6 +200,12 @@
${dhp.commons.lang.version}
+
+ commons-validator
+ commons-validator
+ 1.7
+
+
com.google.guava
guava
@@ -730,7 +736,7 @@
3.3.3
3.4.2
[2.12,3.0)
- [2.5.10]
+ [2.5.11]
[4.0.3]
[6.0.5]
[3.1.6]