forked from D-Net/dnet-hadoop
[country propagation] fixes error 'cannot resolve countrySet given input columns: []' when there is no prepared information driving the propagation process for a given result type
This commit is contained in:
parent
699736addc
commit
3800361033
|
@ -14,6 +14,7 @@ import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -84,19 +85,26 @@ public class SparkCountryPropagationJob {
|
||||||
Dataset<R> res = readPath(spark, sourcePath, resultClazz);
|
Dataset<R> res = readPath(spark, sourcePath, resultClazz);
|
||||||
|
|
||||||
log.info("Reading prepared info: {}", preparedInfoPath);
|
log.info("Reading prepared info: {}", preparedInfoPath);
|
||||||
Dataset<ResultCountrySet> prepared = spark
|
final Dataset<Row> preparedInfoRaw = spark
|
||||||
.read()
|
.read()
|
||||||
.json(preparedInfoPath)
|
.json(preparedInfoPath);
|
||||||
.as(Encoders.bean(ResultCountrySet.class));
|
|
||||||
|
|
||||||
res
|
|
||||||
.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
|
|
||||||
.map(getCountryMergeFn(), Encoders.bean(resultClazz))
|
|
||||||
.write()
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.json(outputPath);
|
|
||||||
|
|
||||||
|
if (!preparedInfoRaw.isEmpty()) {
|
||||||
|
final Dataset<ResultCountrySet> prepared = preparedInfoRaw.as(Encoders.bean(ResultCountrySet.class));
|
||||||
|
res
|
||||||
|
.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
|
||||||
|
.map(getCountryMergeFn(), Encoders.bean(resultClazz))
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
} else {
|
||||||
|
res
|
||||||
|
.write()
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
|
private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
|
||||||
|
|
Loading…
Reference in New Issue