From bed65a1be68129d7e9ddc25675ca53d5b80425ce Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 12 Jun 2020 18:25:47 +0200 Subject: [PATCH] WIP: graph cleaner implementation --- .../dhp/oa/graph/clean/CleaningRule.java | 62 ++++---- .../dhp/oa/graph/clean/OafNavigator.java | 140 ++++-------------- .../dhp/oa/graph/clean/OafNavigator2.java | 96 ------------ 3 files changed, 58 insertions(+), 240 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafNavigator2.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java index a7efbb16a..6d7a262be 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java @@ -2,16 +2,13 @@ package eu.dnetlib.dhp.oa.graph.clean; import java.util.Map; -import java.util.Objects; -import java.util.function.Function; +import java.util.function.Consumer; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; import com.google.common.collect.Maps; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; -import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @@ -20,54 +17,47 @@ public class CleaningRule implements MapFunction { private VocabularyGroup vocabularies; - private Map> mapping = Maps.newHashMap(); + private Map> mapping = Maps.newHashMap(); public CleaningRule(VocabularyGroup vocabularies) { this.vocabularies = vocabularies; - - mapping.put(Qualifier.class, o -> patchQualifier(o)); - mapping.put(StructuredProperty.class, o -> patchSp(o)); - mapping.put(Field.class, o -> patchStringField(o)); + setMappings(vocabularies); } @Override public T call(T value) throws Exception { - OafNavigator2.apply(value, mapping); + OafNavigator.apply(value, mapping); return value; } - private Object patchQualifier(Object o) { - Qualifier q = (Qualifier) o; - if (vocabularies.vocabularyExists(q.getSchemeid())) { - return vocabularies.lookup(q.getSchemeid(), q.getClassid()); - } - return o; - } - - private Object patchSp(Object o) { - StructuredProperty sp = (StructuredProperty) o; - if (StringUtils.isBlank(sp.getValue())) { - return null; - } - return o; - } - - private Object patchStringField(Object o) { - Field f = (Field) o; - try { - if (StringUtils.isBlank((String) f.getValue())) { - return null; + /** + * Populates the mapping for the Oaf types subject to cleaning + * + * @param vocabularies + */ + private void setMappings(VocabularyGroup vocabularies) { + mapping.put(Qualifier.class, o -> { + Qualifier q = (Qualifier) o; + if (vocabularies.vocabularyExists(q.getSchemeid())) { + Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid()); + q.setClassid(newValue.getClassid()); + q.setClassname(newValue.getClassname()); } - } catch (ClassCastException e) { - // ignored on purpose - } - - return o; + }); + mapping.put(StructuredProperty.class, o -> { + StructuredProperty sp = (StructuredProperty) o; + // TODO implement a policy + /* + * if (StringUtils.isBlank(sp.getValue())) { sp.setValue(null); sp.setQualifier(null); sp.setDataInfo(null); + * } + */ + }); } public VocabularyGroup getVocabularies() { return vocabularies; } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafNavigator.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafNavigator.java index 8b27dabf6..2cc499577 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafNavigator.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafNavigator.java @@ -1,132 +1,56 @@ package eu.dnetlib.dhp.oa.graph.clean; -import java.beans.BeanInfo; -import java.beans.IntrospectionException; -import java.beans.Introspector; -import java.beans.PropertyDescriptor; import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; import java.util.*; -import java.util.function.Function; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; +import java.util.function.Consumer; import eu.dnetlib.dhp.schema.oaf.Oaf; -import scala.Tuple2; public class OafNavigator { - public static E apply(E oaf, Map> mapping) { - reflect(oaf, mapping); + public static E apply(E oaf, Map> mapping) { + try { + navigate(oaf, mapping); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } return oaf; } - public static void reflect(Object o, Map> mapping) { - visit(o, mapping); - } - - public static void visit(final Object thingy, Map> mapping) { - - try { - final Class clazz = thingy.getClass(); - - if (!isPrimitive(thingy) && clazz.getPackage().equals(Oaf.class.getPackage())) { - - final BeanInfo beanInfo = Introspector.getBeanInfo(clazz); - - for (final PropertyDescriptor descriptor : beanInfo.getPropertyDescriptors()) { - try { - final Object value = descriptor.getReadMethod().invoke(thingy); - - if (value != null && !isPrimitive(value)) { - - System.out.println("VISITING " + descriptor.getName() + " " + descriptor.getPropertyType()); - - if (Iterable.class.isAssignableFrom(descriptor.getPropertyType())) { - for (Object vi : (Iterable) value) { - - visit(vi, mapping); - } - } else { - - if (mapping.keySet().contains(value.getClass())) { - final Object newValue = mapping.get(value.getClass()).apply(value); - System.out - .println( - "PATCHING " + descriptor.getName() + " " + descriptor.getPropertyType()); - System.out.println("OLD VALUE " + getObjectMapper().writeValueAsString(value)); - System.out.println("NEW VALUE " + getObjectMapper().writeValueAsString(newValue)); - descriptor.getWriteMethod().invoke(newValue); - } - - visit(value, mapping); - } - } - - } catch (final IllegalArgumentException e) { - // handle this please - } catch (final IllegalAccessException e) { - // and this also - } catch (final InvocationTargetException e) { - // and this, too - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - } - } catch (final IntrospectionException e) { - // do something sensible here - } - } - - private static ObjectMapper getObjectMapper() { - final ObjectMapper mapper = new ObjectMapper(); - return mapper; - } - - private static void navigate(Object o, Map> mapping) { - if (Objects.isNull(o) || isPrimitive(o)) { + private static void navigate(Object o, Map> mapping) throws IllegalAccessException { + if (isPrimitive(o)) { return; + } else if (isIterable(o.getClass())) { + for (final Object elem : (Iterable) o) { + navigate(elem, mapping); + } + } else if (hasMapping(o, mapping)) { + mapping.get(o.getClass()).accept(o); } else { - try { - for (Field field : getAllFields(o.getClass())) { - System.out.println(field.getName()); - field.setAccessible(true); - Object value = field.get(o); - - if (Objects.nonNull(value)) { - final Class fieldType = field.getType(); - if ((fieldType.isArray() && !fieldType.getComponentType().isPrimitive())) { - Object[] fs = (Object[]) value; - for (Object fi : fs) { - navigate(fi, mapping); - } - } - if (Iterable.class.isAssignableFrom(fieldType)) { - Iterable fs = (Iterable) value; - for (Object fi : fs) { - navigate(fi, mapping); - } - } else { - if (mapping.keySet().contains(value.getClass())) { - System.out.println("PATCHING " + field.getName()); - field.set(o, mapping.get(value.getClass()).apply(value)); - } - } - } + for (final Field f : getAllFields(o.getClass())) { + f.setAccessible(true); + final Object val = f.get(o); + if (!isPrimitive(val) && hasMapping(val, mapping)) { + mapping.get(val.getClass()).accept(val); + } else { + navigate(f.get(o), mapping); } - - } catch (IllegalAccessException | IllegalArgumentException e) { - throw new RuntimeException(e); } } } + private static boolean hasMapping(Object o, Map> mapping) { + return mapping.containsKey(o.getClass()); + } + + private static boolean isIterable(final Class cl) { + return Iterable.class.isAssignableFrom(cl); + } + private static boolean isPrimitive(Object o) { - return o.getClass().isPrimitive() + return Objects.isNull(o) + || o.getClass().isPrimitive() || o instanceof Class || o instanceof Integer || o instanceof Double diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafNavigator2.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafNavigator2.java deleted file mode 100644 index 264c95daf..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafNavigator2.java +++ /dev/null @@ -1,96 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.clean; - -import java.lang.reflect.Field; -import java.util.*; -import java.util.function.Function; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.schema.oaf.Oaf; - -public class OafNavigator2 { - - public static E apply(E oaf, Map> mapping) { - navigate(oaf, mapping); - return oaf; - } - - private static void navigate(Object o, Map> mapping) { - if (Objects.isNull(o) || isPrimitive(o)) { - return; - } else { - try { - for (Field field : getAllFields(o.getClass())) { - //System.out.println("VISITING " + field.getName() + " in " + o.getClass()); - field.setAccessible(true); - Object value = field.get(o); - - if (Objects.nonNull(value)) { - final Class fieldType = field.getType(); - if ((fieldType.isArray() && !fieldType.getComponentType().isPrimitive())) { - Object[] fs = (Object[]) value; - for (Object fi : fs) { - navigate(fi, mapping); - } - } - if (Iterable.class.isAssignableFrom(fieldType)) { - Iterable fs = (Iterable) value; - for (Object fi : fs) { - navigate(fi, mapping); - } - } else { - final Function cleaningFn = mapping.get(value.getClass()); - if (Objects.nonNull(cleaningFn)) { - final Object newValue = cleaningFn.apply(value); - if (!Objects.equals(value, newValue)) { - //System.out.println("PATCHING " + field.getName() + " " + value.getClass()); - //System.out.println("OLD VALUE " + getObjectMapper().writeValueAsString(value)); - //System.out.println("NEW VALUE " + getObjectMapper().writeValueAsString(newValue)); - field.set(o, newValue); - } - } - } - } - } - - } catch (IllegalAccessException | IllegalArgumentException /*| JsonProcessingException*/ e) { - throw new RuntimeException(e); - } - } - } - - private static ObjectMapper getObjectMapper() { - final ObjectMapper mapper = new ObjectMapper(); - return mapper; - } - - private static boolean isPrimitive(Object o) { - return o.getClass().isPrimitive() - || o instanceof Class - || o instanceof Integer - || o instanceof Double - || o instanceof Float - || o instanceof Long - || o instanceof Boolean - || o instanceof String - || o instanceof Byte; - } - - private static List getAllFields(Class clazz) { - return getAllFields(new LinkedList<>(), clazz); - } - - private static List getAllFields(List fields, Class clazz) { - fields.addAll(Arrays.asList(clazz.getDeclaredFields())); - - final Class superclass = clazz.getSuperclass(); - if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) { - getAllFields(fields, superclass); - } - - return fields; - } - -}