forked from D-Net/dnet-hadoop
One result linked to more than on project is saved just once
This commit is contained in:
parent
320cf02d96
commit
886617afd0
|
@ -97,6 +97,11 @@ public class SparkResultLinkedToProject implements Serializable {
|
||||||
"on rel.target = p.id " +
|
"on rel.target = p.id " +
|
||||||
"")
|
"")
|
||||||
.as(Encoders.bean(inputClazz))
|
.as(Encoders.bean(inputClazz))
|
||||||
|
.groupByKey(
|
||||||
|
(MapFunction< R, String>) value -> value
|
||||||
|
.getId(),
|
||||||
|
Encoders.STRING())
|
||||||
|
.mapGroups((MapGroupsFunction<String, R, R>) (k, it) -> it.next(), Encoders.bean(inputClazz))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
Loading…
Reference in New Issue