forked from D-Net/dnet-hadoop
merged with master
This commit is contained in:
commit
fb405275f7
|
@ -71,20 +71,15 @@ public class SparkCountryPropagationJob3 {
|
||||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
|
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> execPropagation(
|
||||||
|
spark,
|
||||||
execPropagation(
|
inputPath,
|
||||||
spark,
|
outputPath,
|
||||||
inputPath,
|
resultClazz,
|
||||||
outputPath,
|
saveGraph));
|
||||||
resultClazz,
|
|
||||||
saveGraph);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> void execPropagation(
|
private static <R extends Result> void execPropagation(
|
||||||
|
@ -101,8 +96,8 @@ public class SparkCountryPropagationJob3 {
|
||||||
spark
|
spark
|
||||||
.read()
|
.read()
|
||||||
.json(inputPath)
|
.json(inputPath)
|
||||||
.as(Encoders.kryo(resultClazz))
|
.as(Encoders.bean(resultClazz))
|
||||||
.groupByKey((MapFunction<R, String>) result1 -> result1.getId(), Encoders.STRING())
|
.groupByKey((MapFunction<R, String>) r -> r.getId(), Encoders.STRING())
|
||||||
.mapGroups(getCountryMergeFn(resultClazz), Encoders.bean(resultClazz))
|
.mapGroups(getCountryMergeFn(resultClazz), Encoders.bean(resultClazz))
|
||||||
.write()
|
.write()
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
Loading…
Reference in New Issue