From eed6f210255d4af36cf8881db9a212b18e251c66 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 10 Oct 2024 01:48:49 +0300 Subject: [PATCH] broadcasting moar --- .../dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql index ffd158cba..c33d48dbd 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql @@ -6,7 +6,7 @@ set mapred.job.queue.name=analytics; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/ create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as -select /*+ COALESCE(100) */ r.id, count(distinct rp.project) as count +select /*+ COALESCE(100), BROADCAST(${stats_db_name}.result_projects) */ r.id, count(distinct rp.project) as count from ${stats_db_name}.result r left outer join ${stats_db_name}.result_projects rp on rp.id=r.id group by r.id; /*EOS*/ @@ -24,7 +24,7 @@ left outer join ${stats_db_name}.result r on r.id=rp.id; /*EOS*/ ANALYZE TABLE ${stats_db_name}.project_res COMPUTE STATISTICS; /*EOS*/ -create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as +create /*+ COALESCE(100) */ table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as select pid, sum(case when rp.type='publication' then 1 else 0 end) as publications, sum(case when rp.type='dataset' then 1 else 0 end) as datasets, @@ -41,14 +41,14 @@ DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/ drop table if exists ${stats_db_name}.result_funder purge; /*EOS*/ create table if not exists ${stats_db_name}.result_funder stored as parquet as -select distinct rp.id, p.funder +select /*+ BROADCAST(${stats_db_name}.result_projects), BROADCAST(${stats_db_name}.project) */ distinct rp.id, p.funder from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project; /*EOS*/ ANALYZE TABLE ${stats_db_name}.result_funder COMPUTE STATISTICS; /*EOS*/ create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as -select /*+ COALESCE(100) */ r.id, count(rf.funder) as count +select /*+ COALESCE(100), BROADCAST(${stats_db_name}.result_funder) */ r.id, count(rf.funder) as count from ${stats_db_name}.result r left outer join ${stats_db_name}.result_funder rf on rf.id=r.id group by r.id; /*EOS*/