forked from D-Net/dnet-hadoop
[cleaning] removing non parsable relation.validationDate(s)
This commit is contained in:
parent
6e3a4e9237
commit
eb6acfbabc
|
@ -21,6 +21,10 @@
|
|||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-validator</groupId>
|
||||
<artifactId>commons-validator</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
|
|
|
@ -7,11 +7,13 @@ import java.util.stream.Collectors;
|
|||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.validator.GenericValidator;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class GraphCleaningFunctions extends CleaningFunctions {
|
||||
|
@ -115,7 +117,13 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
|
||||
}
|
||||
} else if (value instanceof Relation) {
|
||||
// nothing to clean here
|
||||
Relation r = (Relation) value;
|
||||
|
||||
if (!isValidDate(r.getValidationDate())) {
|
||||
r.setValidationDate(null);
|
||||
r.setValidated(false);
|
||||
}
|
||||
|
||||
} else if (value instanceof Result) {
|
||||
|
||||
Result r = (Result) value;
|
||||
|
@ -292,6 +300,12 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
return value;
|
||||
}
|
||||
|
||||
protected static boolean isValidDate(String date) {
|
||||
return Stream
|
||||
.of(ModelSupport.DATE_TIME_FORMATS)
|
||||
.anyMatch(format -> GenericValidator.isDate(date, format, false));
|
||||
}
|
||||
|
||||
// HELPERS
|
||||
|
||||
private static boolean isValidAuthorName(Author a) {
|
||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
|||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -15,16 +16,23 @@ import com.fasterxml.jackson.databind.DeserializationFeature;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class OafMapperUtilsTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
@Test
|
||||
public void testDateValidation() {
|
||||
|
||||
assertTrue(GraphCleaningFunctions.isValidDate("2016-05-07T12:41:19.202Z"));
|
||||
assertTrue(GraphCleaningFunctions.isValidDate("2020-09-10 11:08:52"));
|
||||
assertTrue(GraphCleaningFunctions.isValidDate("2016-04-05"));
|
||||
assertFalse(GraphCleaningFunctions.isValidDate("2016 April 05"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergePubs() throws IOException {
|
||||
Publication p1 = read("publication_1.json", Publication.class);
|
||||
|
|
8
pom.xml
8
pom.xml
|
@ -200,6 +200,12 @@
|
|||
<version>${dhp.commons.lang.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-validator</groupId>
|
||||
<artifactId>commons-validator</artifactId>
|
||||
<version>1.7</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
|
@ -730,7 +736,7 @@
|
|||
<mockito-core.version>3.3.3</mockito-core.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
<vtd.version>[2.12,3.0)</vtd.version>
|
||||
<dhp-schemas.version>[2.5.10]</dhp-schemas.version>
|
||||
<dhp-schemas.version>[2.5.11]</dhp-schemas.version>
|
||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||
|
|
Loading…
Reference in New Issue