merge upstream

This commit is contained in:
Miriam Baglioni 2021-04-23 11:55:49 +02:00
commit 7d1b8b7f64
2 changed files with 3 additions and 0 deletions

View File

@ -13,6 +13,7 @@ import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -91,6 +92,7 @@ public class SparkUpdateEntity extends AbstractSparkAction {
final JavaPairRDD<String, String> mergedIds = rel final JavaPairRDD<String, String> mergedIds = rel
.where("relClass == 'merges'") .where("relClass == 'merges'")
.where("source != target")
.select(rel.col("target")) .select(rel.col("target"))
.distinct() .distinct()
.toJavaRDD() .toJavaRDD()

View File

@ -10,6 +10,7 @@ export SOURCE=$1
export SHADOW=$2 export SHADOW=$2
echo "Updating shadow database" echo "Updating shadow database"
impala-shell -q "invalidate metadata"
impala-shell -d ${SOURCE} -q "invalidate metadata" impala-shell -d ${SOURCE} -q "invalidate metadata"
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f - impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
impala-shell -q "create database if not exists ${SHADOW}" impala-shell -q "create database if not exists ${SHADOW}"