graph cleaning to implement ugly hardcoded rules
This commit is contained in:
parent
db5e18c784
commit
8e45c5baa8
|
@ -1003,4 +1003,30 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements bad and ugly things that we should get rid of ASAP.
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* @return
|
||||||
|
* @param <T>
|
||||||
|
*/
|
||||||
|
public static <T extends Oaf> T dedicatedUglyHacks(T value) {
|
||||||
|
if (value instanceof OafEntity) {
|
||||||
|
if (value instanceof Result) {
|
||||||
|
final Result r = (Result) value;
|
||||||
|
|
||||||
|
// Fix for AMS Acta
|
||||||
|
r.getInstance()
|
||||||
|
.stream()
|
||||||
|
.filter(i -> Optional.ofNullable(i.getHostedby()).map(KeyValue::getKey).map(dsId -> dsId.equals("10|re3data_____::4cc76bed7ce2fb95fd8e7a2dfde16016")).orElse(false))
|
||||||
|
.forEach(i -> {
|
||||||
|
if (Optional.ofNullable(i.getPid()).map(pid -> pid.stream().noneMatch(p -> p.getValue().startsWith("10.6092/unibo/amsacta"))).orElse(false)) {
|
||||||
|
i.setHostedby(UNKNOWN_REPOSITORY);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -147,6 +147,7 @@ public class CleanGraphSparkJob {
|
||||||
.map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz))
|
.map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz))
|
||||||
.map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
|
.map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
|
||||||
.map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz))
|
.map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz))
|
||||||
|
.map((MapFunction<T, T>) GraphCleaningFunctions::dedicatedUglyHacks, Encoders.bean(clazz))
|
||||||
.filter((FilterFunction<T>) GraphCleaningFunctions::filter);
|
.filter((FilterFunction<T>) GraphCleaningFunctions::filter);
|
||||||
|
|
||||||
// read the master-duplicate tuples
|
// read the master-duplicate tuples
|
||||||
|
|
Loading…
Reference in New Issue