forked from D-Net/dnet-hadoop
WIP: graph cleaner implementation
This commit is contained in:
parent
c4d9f1837f
commit
bed65a1be6
|
@ -2,16 +2,13 @@
|
|||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
@ -20,54 +17,47 @@ public class CleaningRule<T extends Oaf> implements MapFunction<T, T> {
|
|||
|
||||
private VocabularyGroup vocabularies;
|
||||
|
||||
private Map<Class, Function<Object, Object>> mapping = Maps.newHashMap();
|
||||
private Map<Class, Consumer<Object>> mapping = Maps.newHashMap();
|
||||
|
||||
public CleaningRule(VocabularyGroup vocabularies) {
|
||||
this.vocabularies = vocabularies;
|
||||
|
||||
mapping.put(Qualifier.class, o -> patchQualifier(o));
|
||||
mapping.put(StructuredProperty.class, o -> patchSp(o));
|
||||
mapping.put(Field.class, o -> patchStringField(o));
|
||||
setMappings(vocabularies);
|
||||
}
|
||||
|
||||
@Override
|
||||
public T call(T value) throws Exception {
|
||||
|
||||
OafNavigator2.apply(value, mapping);
|
||||
OafNavigator.apply(value, mapping);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private Object patchQualifier(Object o) {
|
||||
Qualifier q = (Qualifier) o;
|
||||
if (vocabularies.vocabularyExists(q.getSchemeid())) {
|
||||
return vocabularies.lookup(q.getSchemeid(), q.getClassid());
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
private Object patchSp(Object o) {
|
||||
StructuredProperty sp = (StructuredProperty) o;
|
||||
if (StringUtils.isBlank(sp.getValue())) {
|
||||
return null;
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
private Object patchStringField(Object o) {
|
||||
Field f = (Field) o;
|
||||
try {
|
||||
if (StringUtils.isBlank((String) f.getValue())) {
|
||||
return null;
|
||||
/**
|
||||
* Populates the mapping for the Oaf types subject to cleaning
|
||||
*
|
||||
* @param vocabularies
|
||||
*/
|
||||
private void setMappings(VocabularyGroup vocabularies) {
|
||||
mapping.put(Qualifier.class, o -> {
|
||||
Qualifier q = (Qualifier) o;
|
||||
if (vocabularies.vocabularyExists(q.getSchemeid())) {
|
||||
Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid());
|
||||
q.setClassid(newValue.getClassid());
|
||||
q.setClassname(newValue.getClassname());
|
||||
}
|
||||
} catch (ClassCastException e) {
|
||||
// ignored on purpose
|
||||
}
|
||||
|
||||
return o;
|
||||
});
|
||||
mapping.put(StructuredProperty.class, o -> {
|
||||
StructuredProperty sp = (StructuredProperty) o;
|
||||
// TODO implement a policy
|
||||
/*
|
||||
* if (StringUtils.isBlank(sp.getValue())) { sp.setValue(null); sp.setQualifier(null); sp.setDataInfo(null);
|
||||
* }
|
||||
*/
|
||||
});
|
||||
}
|
||||
|
||||
public VocabularyGroup getVocabularies() {
|
||||
return vocabularies;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,132 +1,56 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import java.beans.BeanInfo;
|
||||
import java.beans.IntrospectionException;
|
||||
import java.beans.Introspector;
|
||||
import java.beans.PropertyDescriptor;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Lists;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class OafNavigator {
|
||||
|
||||
public static <E extends Oaf> E apply(E oaf, Map<Class, Function<Object, Object>> mapping) {
|
||||
reflect(oaf, mapping);
|
||||
public static <E extends Oaf> E apply(E oaf, Map<Class, Consumer<Object>> mapping) {
|
||||
try {
|
||||
navigate(oaf, mapping);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return oaf;
|
||||
}
|
||||
|
||||
public static void reflect(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||
visit(o, mapping);
|
||||
}
|
||||
|
||||
public static void visit(final Object thingy, Map<Class, Function<Object, Object>> mapping) {
|
||||
|
||||
try {
|
||||
final Class<?> clazz = thingy.getClass();
|
||||
|
||||
if (!isPrimitive(thingy) && clazz.getPackage().equals(Oaf.class.getPackage())) {
|
||||
|
||||
final BeanInfo beanInfo = Introspector.getBeanInfo(clazz);
|
||||
|
||||
for (final PropertyDescriptor descriptor : beanInfo.getPropertyDescriptors()) {
|
||||
try {
|
||||
final Object value = descriptor.getReadMethod().invoke(thingy);
|
||||
|
||||
if (value != null && !isPrimitive(value)) {
|
||||
|
||||
System.out.println("VISITING " + descriptor.getName() + " " + descriptor.getPropertyType());
|
||||
|
||||
if (Iterable.class.isAssignableFrom(descriptor.getPropertyType())) {
|
||||
for (Object vi : (Iterable) value) {
|
||||
|
||||
visit(vi, mapping);
|
||||
}
|
||||
} else {
|
||||
|
||||
if (mapping.keySet().contains(value.getClass())) {
|
||||
final Object newValue = mapping.get(value.getClass()).apply(value);
|
||||
System.out
|
||||
.println(
|
||||
"PATCHING " + descriptor.getName() + " " + descriptor.getPropertyType());
|
||||
System.out.println("OLD VALUE " + getObjectMapper().writeValueAsString(value));
|
||||
System.out.println("NEW VALUE " + getObjectMapper().writeValueAsString(newValue));
|
||||
descriptor.getWriteMethod().invoke(newValue);
|
||||
}
|
||||
|
||||
visit(value, mapping);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (final IllegalArgumentException e) {
|
||||
// handle this please
|
||||
} catch (final IllegalAccessException e) {
|
||||
// and this also
|
||||
} catch (final InvocationTargetException e) {
|
||||
// and this, too
|
||||
} catch (JsonProcessingException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (final IntrospectionException e) {
|
||||
// do something sensible here
|
||||
}
|
||||
}
|
||||
|
||||
private static ObjectMapper getObjectMapper() {
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
return mapper;
|
||||
}
|
||||
|
||||
private static void navigate(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||
if (Objects.isNull(o) || isPrimitive(o)) {
|
||||
private static void navigate(Object o, Map<Class, Consumer<Object>> mapping) throws IllegalAccessException {
|
||||
if (isPrimitive(o)) {
|
||||
return;
|
||||
} else if (isIterable(o.getClass())) {
|
||||
for (final Object elem : (Iterable<?>) o) {
|
||||
navigate(elem, mapping);
|
||||
}
|
||||
} else if (hasMapping(o, mapping)) {
|
||||
mapping.get(o.getClass()).accept(o);
|
||||
} else {
|
||||
try {
|
||||
for (Field field : getAllFields(o.getClass())) {
|
||||
System.out.println(field.getName());
|
||||
field.setAccessible(true);
|
||||
Object value = field.get(o);
|
||||
|
||||
if (Objects.nonNull(value)) {
|
||||
final Class<?> fieldType = field.getType();
|
||||
if ((fieldType.isArray() && !fieldType.getComponentType().isPrimitive())) {
|
||||
Object[] fs = (Object[]) value;
|
||||
for (Object fi : fs) {
|
||||
navigate(fi, mapping);
|
||||
}
|
||||
}
|
||||
if (Iterable.class.isAssignableFrom(fieldType)) {
|
||||
Iterable fs = (Iterable) value;
|
||||
for (Object fi : fs) {
|
||||
navigate(fi, mapping);
|
||||
}
|
||||
} else {
|
||||
if (mapping.keySet().contains(value.getClass())) {
|
||||
System.out.println("PATCHING " + field.getName());
|
||||
field.set(o, mapping.get(value.getClass()).apply(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (final Field f : getAllFields(o.getClass())) {
|
||||
f.setAccessible(true);
|
||||
final Object val = f.get(o);
|
||||
if (!isPrimitive(val) && hasMapping(val, mapping)) {
|
||||
mapping.get(val.getClass()).accept(val);
|
||||
} else {
|
||||
navigate(f.get(o), mapping);
|
||||
}
|
||||
|
||||
} catch (IllegalAccessException | IllegalArgumentException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean hasMapping(Object o, Map<Class, Consumer<Object>> mapping) {
|
||||
return mapping.containsKey(o.getClass());
|
||||
}
|
||||
|
||||
private static boolean isIterable(final Class<?> cl) {
|
||||
return Iterable.class.isAssignableFrom(cl);
|
||||
}
|
||||
|
||||
private static boolean isPrimitive(Object o) {
|
||||
return o.getClass().isPrimitive()
|
||||
return Objects.isNull(o)
|
||||
|| o.getClass().isPrimitive()
|
||||
|| o instanceof Class
|
||||
|| o instanceof Integer
|
||||
|| o instanceof Double
|
||||
|
|
|
@ -1,96 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
|
||||
public class OafNavigator2 {
|
||||
|
||||
public static <E extends Oaf> E apply(E oaf, Map<Class, Function<Object, Object>> mapping) {
|
||||
navigate(oaf, mapping);
|
||||
return oaf;
|
||||
}
|
||||
|
||||
private static void navigate(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||
if (Objects.isNull(o) || isPrimitive(o)) {
|
||||
return;
|
||||
} else {
|
||||
try {
|
||||
for (Field field : getAllFields(o.getClass())) {
|
||||
//System.out.println("VISITING " + field.getName() + " in " + o.getClass());
|
||||
field.setAccessible(true);
|
||||
Object value = field.get(o);
|
||||
|
||||
if (Objects.nonNull(value)) {
|
||||
final Class<?> fieldType = field.getType();
|
||||
if ((fieldType.isArray() && !fieldType.getComponentType().isPrimitive())) {
|
||||
Object[] fs = (Object[]) value;
|
||||
for (Object fi : fs) {
|
||||
navigate(fi, mapping);
|
||||
}
|
||||
}
|
||||
if (Iterable.class.isAssignableFrom(fieldType)) {
|
||||
Iterable fs = (Iterable) value;
|
||||
for (Object fi : fs) {
|
||||
navigate(fi, mapping);
|
||||
}
|
||||
} else {
|
||||
final Function<Object, Object> cleaningFn = mapping.get(value.getClass());
|
||||
if (Objects.nonNull(cleaningFn)) {
|
||||
final Object newValue = cleaningFn.apply(value);
|
||||
if (!Objects.equals(value, newValue)) {
|
||||
//System.out.println("PATCHING " + field.getName() + " " + value.getClass());
|
||||
//System.out.println("OLD VALUE " + getObjectMapper().writeValueAsString(value));
|
||||
//System.out.println("NEW VALUE " + getObjectMapper().writeValueAsString(newValue));
|
||||
field.set(o, newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IllegalAccessException | IllegalArgumentException /*| JsonProcessingException*/ e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static ObjectMapper getObjectMapper() {
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
return mapper;
|
||||
}
|
||||
|
||||
private static boolean isPrimitive(Object o) {
|
||||
return o.getClass().isPrimitive()
|
||||
|| o instanceof Class
|
||||
|| o instanceof Integer
|
||||
|| o instanceof Double
|
||||
|| o instanceof Float
|
||||
|| o instanceof Long
|
||||
|| o instanceof Boolean
|
||||
|| o instanceof String
|
||||
|| o instanceof Byte;
|
||||
}
|
||||
|
||||
private static List<Field> getAllFields(Class<?> clazz) {
|
||||
return getAllFields(new LinkedList<>(), clazz);
|
||||
}
|
||||
|
||||
private static List<Field> getAllFields(List<Field> fields, Class<?> clazz) {
|
||||
fields.addAll(Arrays.asList(clazz.getDeclaredFields()));
|
||||
|
||||
final Class<?> superclass = clazz.getSuperclass();
|
||||
if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) {
|
||||
getAllFields(fields, superclass);
|
||||
}
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue