broadcasting moar

This commit is contained in:
Antonis Lempesis 2024-10-10 01:48:49 +03:00
parent 6f426383e6
commit eed6f21025
1 changed files with 4 additions and 4 deletions

View File

@ -6,7 +6,7 @@ set mapred.job.queue.name=analytics; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/
create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as
select /*+ COALESCE(100) */ r.id, count(distinct rp.project) as count select /*+ COALESCE(100), BROADCAST(${stats_db_name}.result_projects) */ r.id, count(distinct rp.project) as count
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
group by r.id; /*EOS*/ group by r.id; /*EOS*/
@ -24,7 +24,7 @@ left outer join ${stats_db_name}.result r on r.id=rp.id; /*EOS*/
ANALYZE TABLE ${stats_db_name}.project_res COMPUTE STATISTICS; /*EOS*/ ANALYZE TABLE ${stats_db_name}.project_res COMPUTE STATISTICS; /*EOS*/
create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as create /*+ COALESCE(100) */ table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as
select pid, select pid,
sum(case when rp.type='publication' then 1 else 0 end) as publications, sum(case when rp.type='publication' then 1 else 0 end) as publications,
sum(case when rp.type='dataset' then 1 else 0 end) as datasets, sum(case when rp.type='dataset' then 1 else 0 end) as datasets,
@ -41,14 +41,14 @@ DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/
drop table if exists ${stats_db_name}.result_funder purge; /*EOS*/ drop table if exists ${stats_db_name}.result_funder purge; /*EOS*/
create table if not exists ${stats_db_name}.result_funder stored as parquet as create table if not exists ${stats_db_name}.result_funder stored as parquet as
select distinct rp.id, p.funder select /*+ BROADCAST(${stats_db_name}.result_projects), BROADCAST(${stats_db_name}.project) */ distinct rp.id, p.funder
from ${stats_db_name}.result_projects rp from ${stats_db_name}.result_projects rp
join ${stats_db_name}.project p on p.id=rp.project; /*EOS*/ join ${stats_db_name}.project p on p.id=rp.project; /*EOS*/
ANALYZE TABLE ${stats_db_name}.result_funder COMPUTE STATISTICS; /*EOS*/ ANALYZE TABLE ${stats_db_name}.result_funder COMPUTE STATISTICS; /*EOS*/
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
select /*+ COALESCE(100) */ r.id, count(rf.funder) as count select /*+ COALESCE(100), BROADCAST(${stats_db_name}.result_funder) */ r.id, count(rf.funder) as count
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.result_funder rf on rf.id=r.id left outer join ${stats_db_name}.result_funder rf on rf.id=r.id
group by r.id; /*EOS*/ group by r.id; /*EOS*/