2020-06-09 17:20:40 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.oa.graph.clean;
|
|
|
|
|
2020-06-12 10:45:18 +02:00
|
|
|
import com.google.common.collect.Maps;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
2020-06-09 17:20:40 +02:00
|
|
|
import org.apache.spark.api.java.function.MapFunction;
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
|
|
|
2020-06-12 10:45:18 +02:00
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Objects;
|
|
|
|
import java.util.function.Function;
|
|
|
|
|
2020-06-09 17:20:40 +02:00
|
|
|
public class CleaningRule<T extends Oaf> implements MapFunction<T, T> {
|
|
|
|
|
|
|
|
private VocabularyGroup vocabularies;
|
|
|
|
|
2020-06-12 10:45:18 +02:00
|
|
|
private Map<Class, Function<Object, Object>> mapping = Maps.newHashMap();
|
|
|
|
|
|
|
|
|
2020-06-09 17:20:40 +02:00
|
|
|
public CleaningRule(VocabularyGroup vocabularies) {
|
|
|
|
this.vocabularies = vocabularies;
|
2020-06-12 10:45:18 +02:00
|
|
|
|
|
|
|
mapping.put(Qualifier.class, o -> patchQualifier(o));
|
|
|
|
mapping.put(StructuredProperty.class, o -> patchSp(o));
|
|
|
|
mapping.put(Field.class, o -> patchStringField(o));
|
2020-06-09 17:20:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public T call(T value) throws Exception {
|
|
|
|
|
2020-06-12 10:45:18 +02:00
|
|
|
OafNavigator.apply(value, mapping);
|
2020-06-09 17:20:40 +02:00
|
|
|
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
2020-06-12 10:45:18 +02:00
|
|
|
private Object patchQualifier(Object o) {
|
|
|
|
Qualifier q = (Qualifier) o;
|
|
|
|
if (vocabularies.vocabularyExists(q.getSchemeid())) {
|
|
|
|
return vocabularies.lookup(q.getSchemeid(), q.getClassid());
|
2020-06-09 17:20:40 +02:00
|
|
|
}
|
2020-06-12 10:45:18 +02:00
|
|
|
return o;
|
|
|
|
}
|
2020-06-09 17:20:40 +02:00
|
|
|
|
2020-06-12 10:45:18 +02:00
|
|
|
private Object patchSp(Object o) {
|
|
|
|
StructuredProperty sp = (StructuredProperty) o;
|
|
|
|
if (StringUtils.isBlank(sp.getValue())) {
|
|
|
|
return null;
|
2020-06-09 17:20:40 +02:00
|
|
|
}
|
2020-06-12 10:45:18 +02:00
|
|
|
return o;
|
2020-06-09 17:20:40 +02:00
|
|
|
}
|
|
|
|
|
2020-06-12 10:45:18 +02:00
|
|
|
private Object patchStringField(Object o) {
|
|
|
|
Field f = (Field) o;
|
|
|
|
try {
|
|
|
|
if (StringUtils.isBlank((String) f.getValue())) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
} catch (ClassCastException e) {
|
|
|
|
// ignored on purpose
|
2020-06-09 17:20:40 +02:00
|
|
|
}
|
|
|
|
|
2020-06-12 10:45:18 +02:00
|
|
|
return o;
|
2020-06-09 17:20:40 +02:00
|
|
|
}
|
2020-06-09 19:52:53 +02:00
|
|
|
|
|
|
|
public VocabularyGroup getVocabularies() {
|
|
|
|
return vocabularies;
|
|
|
|
}
|
2020-06-09 17:20:40 +02:00
|
|
|
}
|