From dcff9cecdf2267b2689bc71dbe67a15bb15b7f34 Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 12 Apr 2021 15:55:27 +0200 Subject: [PATCH 1/2] bug fix: ids in self mergerels are not marked deletedbyinference=true --- .../main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java index 779fb91d6..b65822635 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java @@ -13,6 +13,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -91,6 +92,7 @@ public class SparkUpdateEntity extends AbstractSparkAction { final JavaPairRDD mergedIds = rel .where("relClass == 'merges'") + .where("source != target") .select(rel.col("target")) .distinct() .toJavaRDD() From 03d36fadea1d301975fe929f780eda80918a10ac Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 15 Apr 2021 13:34:22 +0300 Subject: [PATCH 2/2] properly invalidating impala metadata --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh index 57acb2ee7..d04c5ccfd 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh @@ -10,6 +10,7 @@ export SOURCE=$1 export SHADOW=$2 echo "Updating shadow database" +impala-shell -q "invalidate metadata" impala-shell -d ${SOURCE} -q "invalidate metadata" impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f - impala-shell -q "create database if not exists ${SHADOW}"