forked from D-Net/dnet-hadoop
132 lines
5.4 KiB
SQL
132 lines
5.4 KiB
SQL
--Finalize USAGE Stats
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.views_stats;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.downloads_stats;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.pageviews_stats;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.usage_stats;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.project_stats;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.download_stats;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.views_stats
|
|
LIKE ${usageStatsDB}.openaire_views_stats_tmp STORED AS PARQUET;
|
|
|
|
INSERT INTO ${usageStatsDB}.views_stats
|
|
SELECT * FROM ${usageStatsDB}.openaire_views_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.views_stats
|
|
SELECT * FROM ${usageStatsDB}.episciencesviews;
|
|
|
|
INSERT INTO ${usageStatsDB}.views_stats
|
|
SELECT * FROM ${usageStatsDB}.pedocs_views_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.views_stats
|
|
SELECT * FROM ${usageStatsDB}.tudelft_views_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.views_stats
|
|
SELECT * FROM ${usageStatsDB}.la_views_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.views_stats
|
|
SELECT * FROM ${usageStatsDB}.b2share_views_stats_tmp;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.downloads_stats
|
|
LIKE ${usageStatsDB}.openaire_downloads_stats_tmp STORED AS PARQUET;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.openaire_downloads_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.episciencesdownloads;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.pedocs_downloads_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.tudelft_downloads_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.b2share_downloads_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.la_downloads_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.irus_downloads_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.views_stats
|
|
SELECT source, repository_id, result_id, `date`, views, openaire FROM ${usageStatsDB}.irus_R5_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT source, repository_id, result_id, `date`, downloads, openaire FROM ${usageStatsDB}.irus_R5_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.sarc_downloads_stats_tmp;
|
|
|
|
INSERT INTO ${usageStatsDB}.views_stats
|
|
SELECT * FROM ${usageStatsDB}.datacite_views;
|
|
|
|
INSERT INTO ${usageStatsDB}.downloads_stats
|
|
SELECT * FROM ${usageStatsDB}.datacite_downloads;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.pageviews_stats
|
|
LIKE ${usageStatsDB}.openaire_pageviews_stats_tmp STORED AS PARQUET;
|
|
|
|
INSERT INTO ${usageStatsDB}.pageviews_stats
|
|
SELECT * FROM ${usageStatsDB}.openaire_pageviews_stats_tmp;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.usage_stats AS
|
|
SELECT coalesce(ds.source, vs.source) as source,
|
|
coalesce(ds.repository_id, vs.repository_id) as repository_id,
|
|
coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
|
|
coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views,
|
|
coalesce(ds.openaire, 0) as openaire_downloads,
|
|
coalesce(vs.openaire, 0) as openaire_views
|
|
FROM ${usageStatsDB}.downloads_stats AS ds
|
|
FULL OUTER JOIN ${usageStatsDB}.views_stats AS vs ON ds.source=vs.source
|
|
AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.project_stats AS
|
|
with project_views as (select id, sum(views) views, sum(openaire_views) openaire_views,`date`
|
|
from ${usageStatsDB}.usage_stats
|
|
join ${statsDB}.project_results on result_id=result group by id,`date`),
|
|
project_downloads as
|
|
(select id, sum(downloads) downloads,sum(openaire_downloads) openaire_downloads,`date`
|
|
from ${usageStatsDB}.usage_stats
|
|
join ${statsDB}.project_results on result_id=result group by id,`date`)
|
|
SELECT coalesce(pv.id, pd.id) as id, coalesce(pd.`date`, pv.`date`) as `date`,
|
|
coalesce(pv.views, 0) as views, coalesce(pd.downloads, 0) as downloads,
|
|
coalesce(pv.openaire_views) as openaire_views,coalesce(pd.openaire_downloads, 0) as openaire_downloads
|
|
FROM project_downloads pd FULL OUTER JOIN project_views pv ON pd.id=pv.id WHERE pd.`date`=pv.`date`;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.datasource_stats AS
|
|
with datasource_views as
|
|
(select repository_id, sum(views) views, sum(openaire_views) openaire_views,`date`
|
|
from ${usageStatsDB}.usage_stats group by repository_id,`date`),
|
|
datasource_downloads as
|
|
(select repository_id, sum(downloads) downloads,sum(openaire_downloads) openaire_downloads,`date`
|
|
from ${usageStatsDB}.usage_stats group by repository_id,`date`)
|
|
SELECT coalesce(dv.repository_id, dd.repository_id) as repositor_id,
|
|
coalesce(dd.`date`, dv.`date`) as `date`, coalesce(dv.views, 0) as views,
|
|
coalesce(dd.downloads, 0) as downloads,
|
|
coalesce(dv.openaire_views) as openaire_views,coalesce(dd.openaire_downloads, 0) as openaire_downloads
|
|
FROM datasource_downloads dd FULL OUTER JOIN
|
|
datasource_views dv ON dd.repository_id=dv.repository_id WHERE dd.`date`=dv.`date`;
|
|
|
|
|
|
INSERT INTO ${usageStatsDB}.tbl_all_r5_metrics
|
|
SELECT * FROM ${usageStatsDB}.lr_tbl_all_r5_metrics;
|
|
|
|
INSERT INTO ${usageStatsDB}.tbl_all_r5_metrics
|
|
SELECT s.source, d.id AS repository_id,
|
|
ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date,
|
|
s.unique_item_investigations , s.total_item_investigations,
|
|
s.unique_item_requests, s.total_item_requests
|
|
FROM ${usageStatsDB}.sushilog_cop_r5 s, ${statsDB}.datasource_oids d, ${statsDB}.result_oids ro
|
|
WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK';
|
|
|
|
|