From 264939a01acf7f405a53267179bc555c909c04dc Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 23 Jul 2021 14:23:04 +0300 Subject: [PATCH] Update createIndicatorsTablesSprint2.sql --- .../scripts/createIndicatorsTablesSprint2.sql | 56 +++++++++++++------ 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/scripts/createIndicatorsTablesSprint2.sql b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/scripts/createIndicatorsTablesSprint2.sql index 29555b147..f8a47e274 100644 --- a/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/scripts/createIndicatorsTablesSprint2.sql +++ b/dhp-workflows/dhp-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/indicators/oozie_app/scripts/createIndicatorsTablesSprint2.sql @@ -1,6 +1,26 @@ +create table TARGET.indi_project_pubs_count stored as parquet as +select pr.id id, count(p.id) total_pubs from SOURCE.project_results pr +join SOURCE.publication p on p.id=pr.result +group by pr.id + +create table TARGET.indi_project_datasets_count stored as parquet as +select pr.id id, count(d.id) total_datasets from SOURCE.project_results pr +join SOURCE.dataset d on d.id=pr.result +group by pr.id + +create table TARGET.indi_project_software_count stored as parquet as +select pr.id id, count(s.id) total_software from SOURCE.project_results pr +join SOURCE.software s on s.id=pr.result +group by pr.id + +create table TARGET.indi_project_otherresearch_count stored as parquet as +select pr.id id, count(o.id) total_other from SOURCE.project_results pr +join SOURCE.otherresearchproduct o on o.id=pr.result +group by pr.id + create table TARGET.indi_pub_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA from (SELECT year, country, SUM(CASE WHEN bestlicence='Open Access' THEN 1 @@ -16,8 +36,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA group by year, country) tmp create table TARGET.indi_dataset_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA from (SELECT year, country, SUM(CASE WHEN bestlicence='Open Access' THEN 1 @@ -33,8 +53,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA group by year, country) tmp create table TARGET.indi_software_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA from (SELECT year, country, SUM(CASE WHEN bestlicence='Open Access' THEN 1 @@ -51,8 +71,8 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA create table TARGET.indi_other_avg_year_country_oa stored as parquet as -select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentOA, -round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as percentNonOA +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA from (SELECT year, country, SUM(CASE WHEN bestlicence='Open Access' THEN 1 @@ -74,7 +94,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%') join SOURCE.publication p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) -select year, round(no_of_pubs/total*100,3) percentageofpubs, name +select year, name, round(no_of_pubs/total*100,3) averageofpubs from total create table TARGET.indi_dataset_avg_year_context_oa stored as parquet as @@ -84,7 +104,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%') join SOURCE.dataset p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) -select year, round(no_of_pubs/total*100,3) percentageofdataset, name +select year, name, round(no_of_pubs/total*100,3) averageofdataset from total create table TARGET.indi_software_avg_year_context_oa stored as parquet as @@ -94,7 +114,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%') join SOURCE.software p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) -select year, round(no_of_pubs/total*100,3) percentageofsoftware, name +select year, name, round(no_of_pubs/total*100,3) averageofsoftware from total create table TARGET.indi_other_avg_year_context_oa stored as parquet as @@ -104,7 +124,7 @@ join SOURCE.context c on pc.concept like concat('%',c.id,'%') join SOURCE.otherresearchproduct p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) -select year, round(no_of_pubs/total*100,3) percentageofother, name +select year, name, round(no_of_pubs/total*100,3) averageofother from total create table TARGET.indi_other_avg_year_content_oa stored as parquet as @@ -115,7 +135,7 @@ join SOURCE.datasource d on datasource=d.id join SOURCE.otherresearchproduct p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) -select year, round(no_of_pubs/total*100,3) percentageOfOtherresearchproduct, type +select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct from total create table TARGET.indi_software_avg_year_content_oa stored as parquet as @@ -126,7 +146,7 @@ join SOURCE.datasource d on datasource=d.id join SOURCE.software p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) -select year, round(no_of_pubs/total*100,3) percentageOfSoftware, type +select year, type, round(no_of_pubs/total*100,3) averageOfSoftware from total create table TARGET.indi_dataset_avg_year_content_oa stored as parquet as @@ -137,7 +157,7 @@ join SOURCE.datasource d on datasource=d.id join SOURCE.dataset p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) -select year, round(no_of_pubs/total*100,3) percentageOfDatasets, type +select year, type, round(no_of_pubs/total*100,3) averageOfDatasets from total create table TARGET.indi_pub_avg_year_content_oa stored as parquet as @@ -148,7 +168,7 @@ join SOURCE.datasource d on datasource=d.id join SOURCE.publication p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) -select year, round(no_of_pubs/total*100,3) percentageOfPubs, type +select year, type, round(no_of_pubs/total*100,3) averageOfPubs from total create table TARGET.indi_pub_has_cc_licence_tr stored as parquet as @@ -180,6 +200,10 @@ create table TARGET.indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract from SOURCE.publication +compute stats TARGET.indi_project_pubs_count; +compute stats TARGET.indi_project_datasets_count; +compute stats TARGET.indi_project_software_count; +compute stats TARGET.indi_project_otherresearch_count; compute stats TARGET.indi_pub_avg_year_country_oa; compute stats TARGET.indi_dataset_avg_year_country_oa; compute stats TARGET.indi_software_avg_year_country_oa;