dnet-hadoop/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step16.sql

132 lines
5.4 KiB
SQL

--Finalize USAGE Stats
DROP TABLE IF EXISTS ${usageStatsDB}.views_stats;
DROP TABLE IF EXISTS ${usageStatsDB}.downloads_stats;
DROP TABLE IF EXISTS ${usageStatsDB}.pageviews_stats;
DROP TABLE IF EXISTS ${usageStatsDB}.usage_stats;
DROP TABLE IF EXISTS ${usageStatsDB}.project_stats;
DROP TABLE IF EXISTS ${usageStatsDB}.download_stats;
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.views_stats
LIKE ${usageStatsDB}.openaire_views_stats_tmp STORED AS PARQUET;
INSERT INTO ${usageStatsDB}.views_stats
SELECT * FROM ${usageStatsDB}.openaire_views_stats_tmp;
INSERT INTO ${usageStatsDB}.views_stats
SELECT * FROM ${usageStatsDB}.episciencesviews;
INSERT INTO ${usageStatsDB}.views_stats
SELECT * FROM ${usageStatsDB}.pedocs_views_stats_tmp;
INSERT INTO ${usageStatsDB}.views_stats
SELECT * FROM ${usageStatsDB}.tudelft_views_stats_tmp;
INSERT INTO ${usageStatsDB}.views_stats
SELECT * FROM ${usageStatsDB}.la_views_stats_tmp;
INSERT INTO ${usageStatsDB}.views_stats
SELECT * FROM ${usageStatsDB}.b2share_views_stats_tmp;
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.downloads_stats
LIKE ${usageStatsDB}.openaire_downloads_stats_tmp STORED AS PARQUET;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.openaire_downloads_stats_tmp;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.episciencesdownloads;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.pedocs_downloads_stats_tmp;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.tudelft_downloads_stats_tmp;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.b2share_downloads_stats_tmp;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.la_downloads_stats_tmp;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.irus_downloads_stats_tmp;
INSERT INTO ${usageStatsDB}.views_stats
SELECT source, repository_id, result_id, `date`, views, openaire FROM ${usageStatsDB}.irus_R5_stats_tmp;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT source, repository_id, result_id, `date`, downloads, openaire FROM ${usageStatsDB}.irus_R5_stats_tmp;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.sarc_downloads_stats_tmp;
INSERT INTO ${usageStatsDB}.views_stats
SELECT * FROM ${usageStatsDB}.datacite_views;
INSERT INTO ${usageStatsDB}.downloads_stats
SELECT * FROM ${usageStatsDB}.datacite_downloads;
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.pageviews_stats
LIKE ${usageStatsDB}.openaire_pageviews_stats_tmp STORED AS PARQUET;
INSERT INTO ${usageStatsDB}.pageviews_stats
SELECT * FROM ${usageStatsDB}.openaire_pageviews_stats_tmp;
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.usage_stats AS
SELECT coalesce(ds.source, vs.source) as source,
coalesce(ds.repository_id, vs.repository_id) as repository_id,
coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views,
coalesce(ds.openaire, 0) as openaire_downloads,
coalesce(vs.openaire, 0) as openaire_views
FROM ${usageStatsDB}.downloads_stats AS ds
FULL OUTER JOIN ${usageStatsDB}.views_stats AS vs ON ds.source=vs.source
AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.project_stats AS
with project_views as (select id, sum(views) views, sum(openaire_views) openaire_views,`date`
from ${usageStatsDB}.usage_stats
join ${statsDB}.project_results on result_id=result group by id,`date`),
project_downloads as
(select id, sum(downloads) downloads,sum(openaire_downloads) openaire_downloads,`date`
from ${usageStatsDB}.usage_stats
join ${statsDB}.project_results on result_id=result group by id,`date`)
SELECT coalesce(pv.id, pd.id) as id, coalesce(pd.`date`, pv.`date`) as `date`,
coalesce(pv.views, 0) as views, coalesce(pd.downloads, 0) as downloads,
coalesce(pv.openaire_views) as openaire_views,coalesce(pd.openaire_downloads, 0) as openaire_downloads
FROM project_downloads pd FULL OUTER JOIN project_views pv ON pd.id=pv.id WHERE pd.`date`=pv.`date`;
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.datasource_stats AS
with datasource_views as
(select repository_id, sum(views) views, sum(openaire_views) openaire_views,`date`
from ${usageStatsDB}.usage_stats group by repository_id,`date`),
datasource_downloads as
(select repository_id, sum(downloads) downloads,sum(openaire_downloads) openaire_downloads,`date`
from ${usageStatsDB}.usage_stats group by repository_id,`date`)
SELECT coalesce(dv.repository_id, dd.repository_id) as repositor_id,
coalesce(dd.`date`, dv.`date`) as `date`, coalesce(dv.views, 0) as views,
coalesce(dd.downloads, 0) as downloads,
coalesce(dv.openaire_views) as openaire_views,coalesce(dd.openaire_downloads, 0) as openaire_downloads
FROM datasource_downloads dd FULL OUTER JOIN
datasource_views dv ON dd.repository_id=dv.repository_id WHERE dd.`date`=dv.`date`;
INSERT INTO ${usageStatsDB}.tbl_all_r5_metrics
SELECT * FROM ${usageStatsDB}.lr_tbl_all_r5_metrics;
INSERT INTO ${usageStatsDB}.tbl_all_r5_metrics
SELECT s.source, d.id AS repository_id,
ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date,
s.unique_item_investigations , s.total_item_investigations,
s.unique_item_requests, s.total_item_requests
FROM ${usageStatsDB}.sushilog_cop_r5 s, ${statsDB}.datasource_oids d, ${statsDB}.result_oids ro
WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK';