forked from D-Net/dnet-hadoop
Update createIndicatorsTablesSprint2.sql
This commit is contained in:
parent
dd1098255e
commit
264939a01a
|
@ -1,6 +1,26 @@
|
|||
create table TARGET.indi_project_pubs_count stored as parquet as
|
||||
select pr.id id, count(p.id) total_pubs from SOURCE.project_results pr
|
||||
join SOURCE.publication p on p.id=pr.result
|
||||
group by pr.id
|
||||
|
||||
create table TARGET.indi_project_datasets_count stored as parquet as
|
||||
select pr.id id, count(d.id) total_datasets from SOURCE.project_results pr
|
||||
join SOURCE.dataset d on d.id=pr.result
|
||||
group by pr.id
|
||||
|
||||
create table TARGET.indi_project_software_count stored as parquet as
|
||||
select pr.id id, count(s.id) total_software from SOURCE.project_results pr
|
||||
join SOURCE.software s on s.id=pr.result
|
||||
group by pr.id
|
||||
|
||||
create table TARGET.indi_project_otherresearch_count stored as parquet as
|
||||
select pr.id id, count(o.id) total_other from SOURCE.project_results pr
|
||||
join SOURCE.otherresearchproduct o on o.id=pr.result
|
||||
group by pr.id
|
||||
|
||||
create table TARGET.indi_pub_avg_year_country_oa stored as parquet as
|
||||
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA,
|
||||
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
|
||||
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
|
||||
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
|
||||
from
|
||||
(SELECT year, country, SUM(CASE
|
||||
WHEN bestlicence='Open Access' THEN 1
|
||||
|
@ -16,8 +36,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
|
|||
group by year, country) tmp
|
||||
|
||||
create table TARGET.indi_dataset_avg_year_country_oa stored as parquet as
|
||||
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA,
|
||||
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
|
||||
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
|
||||
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
|
||||
from
|
||||
(SELECT year, country, SUM(CASE
|
||||
WHEN bestlicence='Open Access' THEN 1
|
||||
|
@ -33,8 +53,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
|
|||
group by year, country) tmp
|
||||
|
||||
create table TARGET.indi_software_avg_year_country_oa stored as parquet as
|
||||
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA,
|
||||
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
|
||||
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
|
||||
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
|
||||
from
|
||||
(SELECT year, country, SUM(CASE
|
||||
WHEN bestlicence='Open Access' THEN 1
|
||||
|
@ -51,8 +71,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
|
|||
|
||||
|
||||
create table TARGET.indi_other_avg_year_country_oa stored as parquet as
|
||||
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA,
|
||||
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA
|
||||
select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA,
|
||||
round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA
|
||||
from
|
||||
(SELECT year, country, SUM(CASE
|
||||
WHEN bestlicence='Open Access' THEN 1
|
||||
|
@ -74,7 +94,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%')
|
|||
join SOURCE.publication p on p.id=pc.id
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by c.name, year )
|
||||
select year, round(no_of_pubs/total*100,3) percentageofpubs, name
|
||||
select year, name, round(no_of_pubs/total*100,3) averageofpubs
|
||||
from total
|
||||
|
||||
create table TARGET.indi_dataset_avg_year_context_oa stored as parquet as
|
||||
|
@ -84,7 +104,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%')
|
|||
join SOURCE.dataset p on p.id=pc.id
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by c.name, year )
|
||||
select year, round(no_of_pubs/total*100,3) percentageofdataset, name
|
||||
select year, name, round(no_of_pubs/total*100,3) averageofdataset
|
||||
from total
|
||||
|
||||
create table TARGET.indi_software_avg_year_context_oa stored as parquet as
|
||||
|
@ -94,7 +114,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%')
|
|||
join SOURCE.software p on p.id=pc.id
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by c.name, year )
|
||||
select year, round(no_of_pubs/total*100,3) percentageofsoftware, name
|
||||
select year, name, round(no_of_pubs/total*100,3) averageofsoftware
|
||||
from total
|
||||
|
||||
create table TARGET.indi_other_avg_year_context_oa stored as parquet as
|
||||
|
@ -104,7 +124,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%')
|
|||
join SOURCE.otherresearchproduct p on p.id=pc.id
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by c.name, year )
|
||||
select year, round(no_of_pubs/total*100,3) percentageofother, name
|
||||
select year, name, round(no_of_pubs/total*100,3) averageofother
|
||||
from total
|
||||
|
||||
create table TARGET.indi_other_avg_year_content_oa stored as parquet as
|
||||
|
@ -115,7 +135,7 @@ join SOURCE.datasource d on datasource=d.id
|
|||
join SOURCE.otherresearchproduct p on p.id=pd.id
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by d.type, year)
|
||||
select year, round(no_of_pubs/total*100,3) percentageOfOtherresearchproduct, type
|
||||
select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct
|
||||
from total
|
||||
|
||||
create table TARGET.indi_software_avg_year_content_oa stored as parquet as
|
||||
|
@ -126,7 +146,7 @@ join SOURCE.datasource d on datasource=d.id
|
|||
join SOURCE.software p on p.id=pd.id
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by d.type, year)
|
||||
select year, round(no_of_pubs/total*100,3) percentageOfSoftware, type
|
||||
select year, type, round(no_of_pubs/total*100,3) averageOfSoftware
|
||||
from total
|
||||
|
||||
create table TARGET.indi_dataset_avg_year_content_oa stored as parquet as
|
||||
|
@ -137,7 +157,7 @@ join SOURCE.datasource d on datasource=d.id
|
|||
join SOURCE.dataset p on p.id=pd.id
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by d.type, year)
|
||||
select year, round(no_of_pubs/total*100,3) percentageOfDatasets, type
|
||||
select year, type, round(no_of_pubs/total*100,3) averageOfDatasets
|
||||
from total
|
||||
|
||||
create table TARGET.indi_pub_avg_year_content_oa stored as parquet as
|
||||
|
@ -148,7 +168,7 @@ join SOURCE.datasource d on datasource=d.id
|
|||
join SOURCE.publication p on p.id=pd.id
|
||||
where cast(year as int)>=2003 and cast(year as int)<=2021
|
||||
group by d.type, year)
|
||||
select year, round(no_of_pubs/total*100,3) percentageOfPubs, type
|
||||
select year, type, round(no_of_pubs/total*100,3) averageOfPubs
|
||||
from total
|
||||
|
||||
create table TARGET.indi_pub_has_cc_licence_tr stored as parquet as
|
||||
|
@ -180,6 +200,10 @@ create table TARGET.indi_pub_has_abstract stored as parquet as
|
|||
select distinct publication.id, coalesce(abstract, 1) has_abstract
|
||||
from SOURCE.publication
|
||||
|
||||
compute stats TARGET.indi_project_pubs_count;
|
||||
compute stats TARGET.indi_project_datasets_count;
|
||||
compute stats TARGET.indi_project_software_count;
|
||||
compute stats TARGET.indi_project_otherresearch_count;
|
||||
compute stats TARGET.indi_pub_avg_year_country_oa;
|
||||
compute stats TARGET.indi_dataset_avg_year_country_oa;
|
||||
compute stats TARGET.indi_software_avg_year_country_oa;
|
||||
|
|
Loading…
Reference in New Issue