dnet-hadoop/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-stats.sql

78 lines
6.0 KiB
SQL

INVALIDATE METADATA;
-- CREATE TABLE chart_country_year AS SELECT org.country AS country, r.year AS year, count(distinct r.id) AS publications FROM result r, result_datasources rd, datasource d, datasource_organizations dor, organization org WHERE r.id=rd.id AND rd.datasource=d.id AND rd.datasource=dor.id AND dor.organization=org.id AND r.type='publication' AND r.bestlicence='Open Access' and r.year>='1990' AND r.year<=CAST(date_part('year',now()) AS STRING) group by org.country, r.year ORDER BY org.country, r.year;
-- CREATE TABLE chart_country_datasources AS SELECT org.country AS country, d.name AS datasource, count(distinct r.id) AS publications FROM result r, result_datasources rd, datasource d, datasource_organizations dor, organization org WHERE r.id=rd.id AND rd.datasource=d.id AND d.id=dor.id AND dor.organization=org.id AND r.type='publication' AND r.bestlicence='Open Access' GROUP BY org.country, d.name ORDER BY org.country, publications DESC;
-- CREATE TABLE chart_country_type AS SELECT org.country AS country, rc.type AS type, count(distinct r.id) AS publications FROM result r, result_datasources rd, datasource d, result_classifications rc, datasource_organizations dor, organization org WHERE r.id=rd.id AND r.id=rc.id AND rd.datasource=d.id AND rd.datasource=dor.id AND dor.organization=org.id AND r.type='publication' AND r.bestlicence='Open Access' GROUP BY org.country, rc.type;
-- CREATE TABLE chart_country_fp7 AS SELECT org.country AS country, r.year AS year, count(distinct r.id) AS publications FROM result r, result_datasources rd, datasource d, datasource_organizations dor, organization org, project_results pr, project p WHERE r.id=rd.id AND rd.datasource=d.id AND rd.datasource=dor.id AND dor.organization=org.id AND r.id=pr.result AND pr.id=p.id AND p.funding_lvl0='FP7' AND r.type='publication' AND r.year>='1990' AND r.year<=CAST(date_part('year',now()) AS STRING) GROUP BY org.country, r.year ORDER BY org.country, r.year;
-- CREATE TABLE chart_datasource_type AS SELECT rd.datasource, rc.type, count(distinct rd.id) FROM result_datasources rd, result_classifications rc WHERE rd.id=rc.id GROUP BY rd.datasource, rc.type;
-- CREATE TABLE chart_datasource_year AS SELECT rd.datasource, r.year, count(distinct rd.id) FROM result r, result_datasources rd WHERE rd.id=r.id GROUP By rd.datasource, r.year;
-- CREATE TABLE chart_datasource_funders AS SELECT rd.datasource, p.funder, count(distinct rd.id) FROM result_datasources rd, project p, project_results pr WHERE p.id=pr.id AND pr.result=rd.id GROUP BY rd.datasource, p.funder;
-- CREATE TABLE chart_datasource_projects_pubs AS SELECT rd.datasource, p.title, count(distinct rd.id) FROM result_datasources rd, project p, project_results pr, result r WHERE p.id=pr.id AND pr.result=rd.id AND pr.result=r.id AND r.type='publication' GROUP BY rd.datasource, p.title;
-- CREATE TABLE chart_datasource_projects_data AS SELECT rd.datasource, p.title, count(distinct rd.id) FROM result_datasources rd, project p, project_results pr, result r WHERE p.id=pr.id AND pr.result=rd.id AND pr.result=r.id and r.type='dataset' GROUP BY rd.datasource, p.title;
-- CREATE TABLE chart_project_year AS SELECT p.id, r.year, count( distinct r.id) FROM result r, project_results pr, project p WHERE r.id=pr.result AND p.id=pr.id AND r.year>=p.start_year GROUP BY p.id, r.year;
-- CREATE TABLE chart_project_license AS SELECT pr.id, r.bestlicence, count(distinct r.id) FROM result r, project_results pr WHERE r.id=pr.result AND r.type='publication' GROUP BY pr.id, r.bestlicence;
-- CREATE TABLE chart_project_repos AS SELECT pr.id, d.name, count (distinct r.id) FROM result r, project_results pr, datasource d, datasource_results dr WHERE r.id=dr.result AND d.id=dr.id AND r.id=pr.result AND r.type='publication' GROUP BY pr.id, d.name;
-- CREATE TABLE rd_distinct AS SELECT DISTINCT * FROM result_datasources;
-- DROP VIEW result_datasources;
-- ALTER TABLE rd_distinct RENAME TO result_datasources;
-- COMPUTE STATS chart_country_datasources;
-- COMPUTE STATS chart_country_fp7;
-- COMPUTE STATS chart_country_type;
-- COMPUTE STATS chart_country_year;
-- COMPUTE STATS chart_datasource_funders;
-- COMPUTE STATS chart_datasource_projects_data;
-- COMPUTE STATS chart_datasource_projects_pubs;
-- COMPUTE STATS chart_datasource_type;
-- COMPUTE STATS chart_datasource_year;
-- COMPUTE STATS chart_project_license;
-- COMPUTE STATS chart_project_repos;
-- COMPUTE STATS chart_project_year;
COMPUTE STATS dataset;
COMPUTE STATS dataset_citations;
COMPUTE STATS dataset_classifications;
COMPUTE STATS dataset_concepts;
COMPUTE STATS dataset_datasources;
COMPUTE STATS dataset_languages;
COMPUTE STATS dataset_oids;
COMPUTE STATS dataset_pids;
COMPUTE STATS dataset_topics;
COMPUTE STATS datasource;
COMPUTE STATS datasource_languages;
COMPUTE STATS datasource_oids;
COMPUTE STATS datasource_organizations;
COMPUTE STATS numbers_country;
COMPUTE STATS organization;
COMPUTE STATS otherresearchproduct;
COMPUTE STATS otherresearchproduct_citations;
COMPUTE STATS otherresearchproduct_classifications;
COMPUTE STATS otherresearchproduct_concepts;
COMPUTE STATS otherresearchproduct_datasources;
COMPUTE STATS otherresearchproduct_languages;
COMPUTE STATS otherresearchproduct_oids;
COMPUTE STATS otherresearchproduct_pids;
COMPUTE STATS otherresearchproduct_topics;
COMPUTE STATS project;
COMPUTE STATS project_oids;
COMPUTE STATS project_organizations;
COMPUTE STATS project_results;
COMPUTE STATS publication;
COMPUTE STATS publication_citations;
COMPUTE STATS publication_classifications;
COMPUTE STATS publication_concepts;
COMPUTE STATS publication_datasources;
COMPUTE STATS publication_languages;
COMPUTE STATS publication_oids;
COMPUTE STATS publication_pids;
COMPUTE STATS publication_topics;
COMPUTE STATS result_organization;
COMPUTE STATS result_projects;
COMPUTE STATS software;
COMPUTE STATS software_citations;
COMPUTE STATS software_classifications;
COMPUTE STATS software_concepts;
COMPUTE STATS software_datasources;
COMPUTE STATS software_languages;
COMPUTE STATS software_oids;
COMPUTE STATS software_pids;
COMPUTE STATS software_topic;