dnet-hadoop/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java

64 lines
1.6 KiB
Java
Raw Normal View History

2020-06-09 17:20:40 +02:00
package eu.dnetlib.dhp.oa.graph.clean;
2020-06-12 12:03:25 +02:00
import java.util.Map;
2020-06-12 18:25:47 +02:00
import java.util.function.Consumer;
2020-06-12 12:03:25 +02:00
2020-06-09 17:20:40 +02:00
import org.apache.spark.api.java.function.MapFunction;
2020-06-12 12:03:25 +02:00
import com.google.common.collect.Maps;
2020-06-09 17:20:40 +02:00
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
2020-06-12 12:03:25 +02:00
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
2020-06-12 10:45:18 +02:00
2020-06-09 17:20:40 +02:00
public class CleaningRule<T extends Oaf> implements MapFunction<T, T> {
private VocabularyGroup vocabularies;
2020-06-12 18:25:47 +02:00
private Map<Class, Consumer<Object>> mapping = Maps.newHashMap();
2020-06-12 10:45:18 +02:00
2020-06-09 17:20:40 +02:00
public CleaningRule(VocabularyGroup vocabularies) {
this.vocabularies = vocabularies;
2020-06-12 18:25:47 +02:00
setMappings(vocabularies);
2020-06-09 17:20:40 +02:00
}
@Override
public T call(T value) throws Exception {
2020-06-12 18:25:47 +02:00
OafNavigator.apply(value, mapping);
2020-06-09 17:20:40 +02:00
return value;
}
2020-06-12 18:25:47 +02:00
/**
* Populates the mapping for the Oaf types subject to cleaning
*
* @param vocabularies
*/
private void setMappings(VocabularyGroup vocabularies) {
mapping.put(Qualifier.class, o -> {
Qualifier q = (Qualifier) o;
if (vocabularies.vocabularyExists(q.getSchemeid())) {
Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid());
q.setClassid(newValue.getClassid());
q.setClassname(newValue.getClassname());
2020-06-12 10:45:18 +02:00
}
2020-06-12 18:25:47 +02:00
});
mapping.put(StructuredProperty.class, o -> {
StructuredProperty sp = (StructuredProperty) o;
// TODO implement a policy
/*
* if (StringUtils.isBlank(sp.getValue())) { sp.setValue(null); sp.setQualifier(null); sp.setDataInfo(null);
* }
*/
});
2020-06-09 17:20:40 +02:00
}
2020-06-09 19:52:53 +02:00
public VocabularyGroup getVocabularies() {
return vocabularies;
}
2020-06-12 18:25:47 +02:00
2020-06-09 17:20:40 +02:00
}