forked from D-Net/dnet-hadoop
Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta
This commit is contained in:
commit
d51275a965
|
@ -42,7 +42,7 @@ join ${stats_db_name}.result res on res.id=r.id;
|
||||||
create table ${stats_db_name}.result_apc as
|
create table ${stats_db_name}.result_apc as
|
||||||
select r.id, r.amount, r.currency
|
select r.id, r.amount, r.currency
|
||||||
from (
|
from (
|
||||||
select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency
|
select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
|
||||||
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
|
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
|
||||||
join ${stats_db_name}.result res on res.id=r.id
|
join ${stats_db_name}.result res on res.id=r.id
|
||||||
where r.amount is not null;
|
where r.amount is not null;
|
||||||
|
|
|
@ -454,16 +454,16 @@ FROM publication_datasources pd
|
||||||
compute stats indi_pub_hybrid_oa_with_cc;
|
compute stats indi_pub_hybrid_oa_with_cc;
|
||||||
|
|
||||||
create table indi_pub_downloads stored as parquet as
|
create table indi_pub_downloads stored as parquet as
|
||||||
SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
|
SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
|
||||||
join publication on result_id=id
|
join publication on result_id=id
|
||||||
where downloads>0
|
where downloads>0
|
||||||
GROUP BY result_id
|
GROUP BY result_id
|
||||||
order by no_dowloads desc;
|
order by no_downloads desc;
|
||||||
|
|
||||||
compute stats indi_pub_downloads;
|
compute stats indi_pub_downloads;
|
||||||
|
|
||||||
create table indi_pub_downloads_datasource stored as parquet as
|
create table indi_pub_downloads_datasource stored as parquet as
|
||||||
SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
|
SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
|
||||||
join publication on result_id=id
|
join publication on result_id=id
|
||||||
where downloads>0
|
where downloads>0
|
||||||
GROUP BY result_id, repository_id
|
GROUP BY result_id, repository_id
|
||||||
|
@ -472,7 +472,7 @@ order by result_id;
|
||||||
compute stats indi_pub_downloads_datasource;
|
compute stats indi_pub_downloads_datasource;
|
||||||
|
|
||||||
create table indi_pub_downloads_year stored as parquet as
|
create table indi_pub_downloads_year stored as parquet as
|
||||||
SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
|
SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
|
||||||
join publication on result_id=id where downloads>0
|
join publication on result_id=id where downloads>0
|
||||||
GROUP BY result_id, `year`
|
GROUP BY result_id, `year`
|
||||||
order by `year` asc;
|
order by `year` asc;
|
||||||
|
@ -480,7 +480,7 @@ order by `year` asc;
|
||||||
compute stats indi_pub_downloads_year;
|
compute stats indi_pub_downloads_year;
|
||||||
|
|
||||||
create table indi_pub_downloads_datasource_year stored as parquet as
|
create table indi_pub_downloads_datasource_year stored as parquet as
|
||||||
SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
|
SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
|
||||||
join publication on result_id=id
|
join publication on result_id=id
|
||||||
where downloads>0
|
where downloads>0
|
||||||
GROUP BY result_id, repository_id, `year`
|
GROUP BY result_id, repository_id, `year`
|
||||||
|
|
|
@ -39,7 +39,6 @@ create table TARGET.result stored as parquet as
|
||||||
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
|
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
|
||||||
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
|
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
|
||||||
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
|
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
|
||||||
|
|
||||||
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
|
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
|
||||||
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
|
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
|
||||||
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
|
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
|
||||||
|
@ -224,18 +223,3 @@ create table TARGET.indi_result_with_pid stored as parquet as select * from SOUR
|
||||||
--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||||
--compute stats TARGET.indi_software_gold_oa;
|
--compute stats TARGET.indi_software_gold_oa;
|
||||||
|
|
||||||
--denorm
|
|
||||||
alter table TARGET.result rename to TARGET.res_tmp;
|
|
||||||
|
|
||||||
create table TARGET.result_denorm stored as parquet as
|
|
||||||
select distinct r.*, rp.project, p.acronym as pacronym, p.title as ptitle, p.funder as pfunder, p.funding_lvl0 as pfunding_lvl0, rd.datasource, d.name as dname, d.type as dtype
|
|
||||||
from TARGET.res_tmp r
|
|
||||||
left outer join TARGET.result_projects rp on rp.id=r.id
|
|
||||||
left outer join TARGET.result_datasources rd on rd.id=r.id
|
|
||||||
left outer join TARGET.project p on p.id=rp.project
|
|
||||||
left outer join TARGET.datasource d on d.id=rd.datasource;
|
|
||||||
compute stats TARGET.result_denorm;
|
|
||||||
|
|
||||||
alter table TARGET.result_denorm rename to TARGET.result;
|
|
||||||
drop table TARGET.res_tmp;
|
|
||||||
--- done!
|
|
Loading…
Reference in New Issue