forked from D-Net/dnet-hadoop
broadcasting moar
This commit is contained in:
parent
6f426383e6
commit
eed6f21025
|
@ -6,7 +6,7 @@ set mapred.job.queue.name=analytics; /*EOS*/
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/
|
DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as
|
create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as
|
||||||
select /*+ COALESCE(100) */ r.id, count(distinct rp.project) as count
|
select /*+ COALESCE(100), BROADCAST(${stats_db_name}.result_projects) */ r.id, count(distinct rp.project) as count
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
group by r.id; /*EOS*/
|
group by r.id; /*EOS*/
|
||||||
|
@ -24,7 +24,7 @@ left outer join ${stats_db_name}.result r on r.id=rp.id; /*EOS*/
|
||||||
ANALYZE TABLE ${stats_db_name}.project_res COMPUTE STATISTICS; /*EOS*/
|
ANALYZE TABLE ${stats_db_name}.project_res COMPUTE STATISTICS; /*EOS*/
|
||||||
|
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as
|
create /*+ COALESCE(100) */ table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as
|
||||||
select pid,
|
select pid,
|
||||||
sum(case when rp.type='publication' then 1 else 0 end) as publications,
|
sum(case when rp.type='publication' then 1 else 0 end) as publications,
|
||||||
sum(case when rp.type='dataset' then 1 else 0 end) as datasets,
|
sum(case when rp.type='dataset' then 1 else 0 end) as datasets,
|
||||||
|
@ -41,14 +41,14 @@ DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/
|
||||||
drop table if exists ${stats_db_name}.result_funder purge; /*EOS*/
|
drop table if exists ${stats_db_name}.result_funder purge; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.result_funder stored as parquet as
|
create table if not exists ${stats_db_name}.result_funder stored as parquet as
|
||||||
select distinct rp.id, p.funder
|
select /*+ BROADCAST(${stats_db_name}.result_projects), BROADCAST(${stats_db_name}.project) */ distinct rp.id, p.funder
|
||||||
from ${stats_db_name}.result_projects rp
|
from ${stats_db_name}.result_projects rp
|
||||||
join ${stats_db_name}.project p on p.id=rp.project; /*EOS*/
|
join ${stats_db_name}.project p on p.id=rp.project; /*EOS*/
|
||||||
|
|
||||||
ANALYZE TABLE ${stats_db_name}.result_funder COMPUTE STATISTICS; /*EOS*/
|
ANALYZE TABLE ${stats_db_name}.result_funder COMPUTE STATISTICS; /*EOS*/
|
||||||
|
|
||||||
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
|
create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as
|
||||||
select /*+ COALESCE(100) */ r.id, count(rf.funder) as count
|
select /*+ COALESCE(100), BROADCAST(${stats_db_name}.result_funder) */ r.id, count(rf.funder) as count
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_funder rf on rf.id=r.id
|
left outer join ${stats_db_name}.result_funder rf on rf.id=r.id
|
||||||
group by r.id; /*EOS*/
|
group by r.id; /*EOS*/
|
||||||
|
|
Loading…
Reference in New Issue