[stats wf] indicators across stats dbs & updates in the org ids #248

Closed
dimitris.pierrakos wants to merge 1742 commits from beta into beta2master_sept_2022
1 changed files with 11 additions and 3 deletions
Showing only changes of commit 389dd25430 - Show all commits

View File

@ -13,6 +13,7 @@ import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
@ -105,6 +106,7 @@ public class CleanCfHbSparkJob {
resolved
.joinWith(md, resolved.col("cfhb").equalTo(md.col("duplicateId")))
.map(asIdCfHbMapping(), Encoders.bean(IdCfHbMapping.class))
.filter((FilterFunction<IdCfHbMapping>) m -> Objects.nonNull(m.getMasterId()))
.write()
.mode(SaveMode.Overwrite)
.json(resolvedPath);
@ -134,9 +136,15 @@ public class CleanCfHbSparkJob {
private static MapFunction<Tuple2<IdCfHbMapping, MasterDuplicate>, IdCfHbMapping> asIdCfHbMapping() {
return t -> {
t._1().setMasterId(t._2().getMasterId());
t._1().setMasterName(t._2().getMasterName());
return t._1();
final IdCfHbMapping mapping = t._1();
Optional
.ofNullable(t._2())
.ifPresent(t2 -> {
mapping.setMasterId(t2.getMasterId());
mapping.setMasterName(t2.getMasterName());
});
return mapping;
};
}