BrBETA_dnet-hadoop/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java

74 lines
1.7 KiB
Java
Raw Normal View History

2020-06-09 17:20:40 +02:00
package eu.dnetlib.dhp.oa.graph.clean;
2020-06-12 10:45:18 +02:00
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.apache.commons.lang3.StringUtils;
2020-06-09 17:20:40 +02:00
import org.apache.spark.api.java.function.MapFunction;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
2020-06-12 10:45:18 +02:00
import java.util.Map;
import java.util.Objects;
import java.util.function.Function;
2020-06-09 17:20:40 +02:00
public class CleaningRule<T extends Oaf> implements MapFunction<T, T> {
private VocabularyGroup vocabularies;
2020-06-12 10:45:18 +02:00
private Map<Class, Function<Object, Object>> mapping = Maps.newHashMap();
2020-06-09 17:20:40 +02:00
public CleaningRule(VocabularyGroup vocabularies) {
this.vocabularies = vocabularies;
2020-06-12 10:45:18 +02:00
mapping.put(Qualifier.class, o -> patchQualifier(o));
mapping.put(StructuredProperty.class, o -> patchSp(o));
mapping.put(Field.class, o -> patchStringField(o));
2020-06-09 17:20:40 +02:00
}
@Override
public T call(T value) throws Exception {
2020-06-12 10:45:18 +02:00
OafNavigator.apply(value, mapping);
2020-06-09 17:20:40 +02:00
return value;
}
2020-06-12 10:45:18 +02:00
private Object patchQualifier(Object o) {
Qualifier q = (Qualifier) o;
if (vocabularies.vocabularyExists(q.getSchemeid())) {
return vocabularies.lookup(q.getSchemeid(), q.getClassid());
2020-06-09 17:20:40 +02:00
}
2020-06-12 10:45:18 +02:00
return o;
}
2020-06-09 17:20:40 +02:00
2020-06-12 10:45:18 +02:00
private Object patchSp(Object o) {
StructuredProperty sp = (StructuredProperty) o;
if (StringUtils.isBlank(sp.getValue())) {
return null;
2020-06-09 17:20:40 +02:00
}
2020-06-12 10:45:18 +02:00
return o;
2020-06-09 17:20:40 +02:00
}
2020-06-12 10:45:18 +02:00
private Object patchStringField(Object o) {
Field f = (Field) o;
try {
if (StringUtils.isBlank((String) f.getValue())) {
return null;
}
} catch (ClassCastException e) {
// ignored on purpose
2020-06-09 17:20:40 +02:00
}
2020-06-12 10:45:18 +02:00
return o;
2020-06-09 17:20:40 +02:00
}
2020-06-09 19:52:53 +02:00
public VocabularyGroup getVocabularies() {
return vocabularies;
}
2020-06-09 17:20:40 +02:00
}