forked from D-Net/dnet-hadoop
grouping of records will be performed by the dedup workflow
This commit is contained in:
parent
1372a4d1bf
commit
c1b9a4045a
|
@ -103,9 +103,6 @@ public class GenerateEntitiesApplication {
|
||||||
}
|
}
|
||||||
|
|
||||||
inputRdd
|
inputRdd
|
||||||
.mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf))
|
|
||||||
.reduceByKey((o1, o2) -> OafMapperUtils.merge(o1, o2))
|
|
||||||
.map(Tuple2::_2)
|
|
||||||
.map(
|
.map(
|
||||||
oaf -> oaf.getClass().getSimpleName().toLowerCase()
|
oaf -> oaf.getClass().getSimpleName().toLowerCase()
|
||||||
+ "|"
|
+ "|"
|
||||||
|
|
Loading…
Reference in New Issue