forked from D-Net/dnet-hadoop
[cleaning] removing non parsable relation.validationDate(s)
This commit is contained in:
parent
6e3a4e9237
commit
eb6acfbabc
|
@ -21,6 +21,10 @@
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-validator</groupId>
|
||||||
|
<artifactId>commons-validator</artifactId>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
|
|
@ -7,11 +7,13 @@ import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.validator.GenericValidator;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class GraphCleaningFunctions extends CleaningFunctions {
|
public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
|
@ -115,7 +117,13 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
|
o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
|
||||||
}
|
}
|
||||||
} else if (value instanceof Relation) {
|
} else if (value instanceof Relation) {
|
||||||
// nothing to clean here
|
Relation r = (Relation) value;
|
||||||
|
|
||||||
|
if (!isValidDate(r.getValidationDate())) {
|
||||||
|
r.setValidationDate(null);
|
||||||
|
r.setValidated(false);
|
||||||
|
}
|
||||||
|
|
||||||
} else if (value instanceof Result) {
|
} else if (value instanceof Result) {
|
||||||
|
|
||||||
Result r = (Result) value;
|
Result r = (Result) value;
|
||||||
|
@ -292,6 +300,12 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static boolean isValidDate(String date) {
|
||||||
|
return Stream
|
||||||
|
.of(ModelSupport.DATE_TIME_FORMATS)
|
||||||
|
.anyMatch(format -> GenericValidator.isDate(date, format, false));
|
||||||
|
}
|
||||||
|
|
||||||
// HELPERS
|
// HELPERS
|
||||||
|
|
||||||
private static boolean isValidAuthorName(Author a) {
|
private static boolean isValidAuthorName(Author a) {
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.time.format.DateTimeParseException;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -15,16 +16,23 @@ import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
public class OafMapperUtilsTest {
|
public class OafMapperUtilsTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDateValidation() {
|
||||||
|
|
||||||
|
assertTrue(GraphCleaningFunctions.isValidDate("2016-05-07T12:41:19.202Z"));
|
||||||
|
assertTrue(GraphCleaningFunctions.isValidDate("2020-09-10 11:08:52"));
|
||||||
|
assertTrue(GraphCleaningFunctions.isValidDate("2016-04-05"));
|
||||||
|
assertFalse(GraphCleaningFunctions.isValidDate("2016 April 05"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMergePubs() throws IOException {
|
public void testMergePubs() throws IOException {
|
||||||
Publication p1 = read("publication_1.json", Publication.class);
|
Publication p1 = read("publication_1.json", Publication.class);
|
||||||
|
|
8
pom.xml
8
pom.xml
|
@ -200,6 +200,12 @@
|
||||||
<version>${dhp.commons.lang.version}</version>
|
<version>${dhp.commons.lang.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-validator</groupId>
|
||||||
|
<artifactId>commons-validator</artifactId>
|
||||||
|
<version>1.7</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
<artifactId>guava</artifactId>
|
<artifactId>guava</artifactId>
|
||||||
|
@ -730,7 +736,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[2.5.10]</dhp-schemas.version>
|
<dhp-schemas.version>[2.5.11]</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue