graph cleaning to implement ugly hardcoded rules

This commit is contained in:
Claudio Atzori 2024-05-28 15:28:42 +02:00
parent db5e18c784
commit 8e45c5baa8
2 changed files with 27 additions and 0 deletions

View File

@ -1003,4 +1003,30 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.orElse(null); .orElse(null);
} }
/**
* Implements bad and ugly things that we should get rid of ASAP.
*
* @param value
* @return
* @param <T>
*/
public static <T extends Oaf> T dedicatedUglyHacks(T value) {
if (value instanceof OafEntity) {
if (value instanceof Result) {
final Result r = (Result) value;
// Fix for AMS Acta
r.getInstance()
.stream()
.filter(i -> Optional.ofNullable(i.getHostedby()).map(KeyValue::getKey).map(dsId -> dsId.equals("10|re3data_____::4cc76bed7ce2fb95fd8e7a2dfde16016")).orElse(false))
.forEach(i -> {
if (Optional.ofNullable(i.getPid()).map(pid -> pid.stream().noneMatch(p -> p.getValue().startsWith("10.6092/unibo/amsacta"))).orElse(false)) {
i.setHostedby(UNKNOWN_REPOSITORY);
}
});
}
}
return value;
}
} }

View File

@ -147,6 +147,7 @@ public class CleanGraphSparkJob {
.map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz)) .map((MapFunction<T, T>) GraphCleaningFunctions::fixVocabularyNames, Encoders.bean(clazz))
.map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz)) .map((MapFunction<T, T>) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
.map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz)) .map((MapFunction<T, T>) value -> GraphCleaningFunctions.cleanup(value, vocs), Encoders.bean(clazz))
.map((MapFunction<T, T>) GraphCleaningFunctions::dedicatedUglyHacks, Encoders.bean(clazz))
.filter((FilterFunction<T>) GraphCleaningFunctions::filter); .filter((FilterFunction<T>) GraphCleaningFunctions::filter);
// read the master-duplicate tuples // read the master-duplicate tuples