Update createIndicatorsTablesSprint2.sql

This commit is contained in:
dimitrispie 2021-07-23 14:23:04 +03:00
parent dd1098255e
commit 264939a01a
1 changed files with 40 additions and 16 deletions

View File

@ -1,6 +1,26 @@
create table TARGET.indi_project_pubs_count stored as parquet as
select pr.id id, count(p.id) total_pubs from SOURCE.project_results pr
join SOURCE.publication p on p.id=pr.result
group by pr.id
create table TARGET.indi_project_datasets_count stored as parquet as
select pr.id id, count(d.id) total_datasets from SOURCE.project_results pr
join SOURCE.dataset d on d.id=pr.result
group by pr.id
create table TARGET.indi_project_software_count stored as parquet as
select pr.id id, count(s.id) total_software from SOURCE.project_results pr
join SOURCE.software s on s.id=pr.result
group by pr.id
create table TARGET.indi_project_otherresearch_count stored as parquet as
select pr.id id, count(o.id) total_other from SOURCE.project_results pr
join SOURCE.otherresearchproduct o on o.id=pr.result
group by pr.id
create table TARGET.indi_pub_avg_year_country_oa stored as parquet as
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
from
(SELECT year, country, SUM(CASE
WHEN bestlicence='Open Access' THEN 1
@ -16,8 +36,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
group by year, country) tmp
create table TARGET.indi_dataset_avg_year_country_oa stored as parquet as
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
from
(SELECT year, country, SUM(CASE
WHEN bestlicence='Open Access' THEN 1
@ -33,8 +53,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
group by year, country) tmp
create table TARGET.indi_software_avg_year_country_oa stored as parquet as
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
from
(SELECT year, country, SUM(CASE
WHEN bestlicence='Open Access' THEN 1
@ -51,8 +71,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
create table TARGET.indi_other_avg_year_country_oa stored as parquet as
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
from
(SELECT year, country, SUM(CASE
WHEN bestlicence='Open Access' THEN 1
@ -74,7 +94,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%')
join SOURCE.publication p on p.id=pc.id
where cast(year as int)>=2003 and cast(year as int)<=2021
group by c.name, year )
select year, round(no_of_pubs/total*100,3) percentageofpubs, name
select year, name, round(no_of_pubs/total*100,3) averageofpubs
from total
create table TARGET.indi_dataset_avg_year_context_oa stored as parquet as
@ -84,7 +104,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%')
join SOURCE.dataset p on p.id=pc.id
where cast(year as int)>=2003 and cast(year as int)<=2021
group by c.name, year )
select year, round(no_of_pubs/total*100,3) percentageofdataset, name
select year, name, round(no_of_pubs/total*100,3) averageofdataset
from total
create table TARGET.indi_software_avg_year_context_oa stored as parquet as
@ -94,7 +114,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%')
join SOURCE.software p on p.id=pc.id
where cast(year as int)>=2003 and cast(year as int)<=2021
group by c.name, year )
select year, round(no_of_pubs/total*100,3) percentageofsoftware, name
select year, name, round(no_of_pubs/total*100,3) averageofsoftware
from total
create table TARGET.indi_other_avg_year_context_oa stored as parquet as
@ -104,7 +124,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%')
join SOURCE.otherresearchproduct p on p.id=pc.id
where cast(year as int)>=2003 and cast(year as int)<=2021
group by c.name, year )
select year, round(no_of_pubs/total*100,3) percentageofother, name
select year, name, round(no_of_pubs/total*100,3) averageofother
from total
create table TARGET.indi_other_avg_year_content_oa stored as parquet as
@ -115,7 +135,7 @@ join SOURCE.datasource d on datasource=d.id
join SOURCE.otherresearchproduct p on p.id=pd.id
where cast(year as int)>=2003 and cast(year as int)<=2021
group by d.type, year)
select year, round(no_of_pubs/total*100,3) percentageOfOtherresearchproduct, type
select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct
from total
create table TARGET.indi_software_avg_year_content_oa stored as parquet as
@ -126,7 +146,7 @@ join SOURCE.datasource d on datasource=d.id
join SOURCE.software p on p.id=pd.id
where cast(year as int)>=2003 and cast(year as int)<=2021
group by d.type, year)
select year, round(no_of_pubs/total*100,3) percentageOfSoftware, type
select year, type, round(no_of_pubs/total*100,3) averageOfSoftware
from total
create table TARGET.indi_dataset_avg_year_content_oa stored as parquet as
@ -137,7 +157,7 @@ join SOURCE.datasource d on datasource=d.id
join SOURCE.dataset p on p.id=pd.id
where cast(year as int)>=2003 and cast(year as int)<=2021
group by d.type, year)
select year, round(no_of_pubs/total*100,3) percentageOfDatasets, type
select year, type, round(no_of_pubs/total*100,3) averageOfDatasets
from total
create table TARGET.indi_pub_avg_year_content_oa stored as parquet as
@ -148,7 +168,7 @@ join SOURCE.datasource d on datasource=d.id
join SOURCE.publication p on p.id=pd.id
where cast(year as int)>=2003 and cast(year as int)<=2021
group by d.type, year)
select year, round(no_of_pubs/total*100,3) percentageOfPubs, type
select year, type, round(no_of_pubs/total*100,3) averageOfPubs
from total
create table TARGET.indi_pub_has_cc_licence_tr stored as parquet as
@ -180,6 +200,10 @@ create table TARGET.indi_pub_has_abstract stored as parquet as
select distinct publication.id, coalesce(abstract, 1) has_abstract
from SOURCE.publication
compute stats TARGET.indi_project_pubs_count;
compute stats TARGET.indi_project_datasets_count;
compute stats TARGET.indi_project_software_count;
compute stats TARGET.indi_project_otherresearch_count;
compute stats TARGET.indi_pub_avg_year_country_oa;
compute stats TARGET.indi_dataset_avg_year_country_oa;
compute stats TARGET.indi_software_avg_year_country_oa;