forked from D-Net/dnet-hadoop
One result linked to more than on project is saved just once
This commit is contained in:
parent
320cf02d96
commit
886617afd0
|
@ -97,6 +97,11 @@ public class SparkResultLinkedToProject implements Serializable {
|
|||
"on rel.target = p.id " +
|
||||
"")
|
||||
.as(Encoders.bean(inputClazz))
|
||||
.groupByKey(
|
||||
(MapFunction< R, String>) value -> value
|
||||
.getId(),
|
||||
Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, R, R>) (k, it) -> it.next(), Encoders.bean(inputClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
|
Loading…
Reference in New Issue