forked from D-Net/dnet-hadoop
Removed filter of datacite items from the raw graph merging phase, Datacite is not an actionset anymore in beta
This commit is contained in:
parent
4acfa8fa2e
commit
1be9aa0a5f
|
@ -127,13 +127,6 @@ public class MergeGraphTableSparkJob {
|
||||||
}
|
}
|
||||||
}, Encoders.bean(p_clazz))
|
}, Encoders.bean(p_clazz))
|
||||||
.filter((FilterFunction<P>) Objects::nonNull)
|
.filter((FilterFunction<P>) Objects::nonNull)
|
||||||
.filter((FilterFunction<P>) o -> {
|
|
||||||
HashSet<String> collectedFromNames = Optional
|
|
||||||
.ofNullable(o.getCollectedfrom())
|
|
||||||
.map(c -> c.stream().map(KeyValue::getValue).collect(Collectors.toCollection(HashSet::new)))
|
|
||||||
.orElse(new HashSet<>());
|
|
||||||
return !collectedFromNames.contains("Datacite");
|
|
||||||
})
|
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
Loading…
Reference in New Issue