From f1bce643910514f956fff7180a04631e7adf21e5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 10 Jun 2020 21:36:31 +0200 Subject: [PATCH] WIP: graph cleaner implementation --- .../oa/graph/clean/NormalizeEmptyFields.java | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/NormalizeEmptyFields.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/NormalizeEmptyFields.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/NormalizeEmptyFields.java new file mode 100644 index 000000000..77537801f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/NormalizeEmptyFields.java @@ -0,0 +1,88 @@ + +package eu.dnetlib.dhp.oa.graph.clean; + +import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.api.java.function.MapFunction; + +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; + +public class NormalizeEmptyFields implements MapFunction { + + private VocabularyGroup vocabularies; + + public NormalizeEmptyFields(VocabularyGroup vocabularies) { + this.vocabularies = vocabularies; + } + + @Override + public T call(T value) throws Exception { + + doNormalize(value); + + return value; + } + + private void doNormalize(Object o) { + if (Objects.isNull(o)) { + return; + } + + if (o instanceof Iterable) { + for (Object oi : (Iterable) o) { + doNormalize(oi); + } + } else { + + Class clazz = o.getClass(); + + if (clazz.isPrimitive() + || o instanceof Integer + || o instanceof Double + || o instanceof Float + || o instanceof Long + || o instanceof Boolean + || o instanceof String) { + return; + } else { + try { + for (Field field : getAllFields(new LinkedList<>(), clazz)) { + field.setAccessible(true); + Object value = field.get(o); + if (value instanceof Qualifier && Objects.isNull(value)) { + field.set(o, OafMapperUtils.unknown("", "")); + } else if (value instanceof Field && Objects.isNull(value)) { + + } else { + doNormalize(value); + } + } + } catch (IllegalAccessException | IllegalArgumentException e) { + throw new RuntimeException(e); + } + } + } + } + + private static List getAllFields(List fields, Class clazz) { + fields.addAll(Arrays.asList(clazz.getDeclaredFields())); + + final Class superclass = clazz.getSuperclass(); + if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) { + getAllFields(fields, superclass); + } + + return fields; + } + + public VocabularyGroup getVocabularies() { + return vocabularies; + } +}