dnet-hadoop/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step5.sql

109 lines
6.6 KiB
SQL

--CoP R5
CREATE OR REPLACE VIEW ${usageStatsDB}.view_unique_item_investigations
AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations,
SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
FROM ${usageStatsDB}.piwiklogdistinct
WHERE (source_item_type='oaItem' or source_item_type='repItem')
AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
DROP TABLE IF EXISTS ${usageStatsDB}.tbl_unique_item_investigations ";
CREATE TABLE ${usageStatsDB}.tbl_unique_item_investigations as
SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
sum(unique_item_investigations) AS unique_item_investigations, sum(openaire_referrer) AS openaire
FROM ${usageStatsDB}.view_unique_item_investigations p, ${statsDB}.datasource d, ${statsDB}.result_oids ro
WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
GROUP BY d.id, ro.id, month;
CREATE OR REPLACE VIEW ${usageStatsDB}.view_total_item_investigations
AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
COUNT(entity_id) AS total_item_investigations,
SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
FROM ${usageStatsDB}.piwiklogdistinct WHERE (source_item_type='oaItem' or source_item_type='repItem')
AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
DROP TABLE IF EXISTS ${usageStatsDB}.tbl_total_item_investigations;
CREATE TABLE ${usageStatsDB}.tbl_total_item_investigations AS
SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
sum(total_item_investigations) AS total_item_investigations, sum(openaire_referrer) AS openaire
FROM ${usageStatsDB}.view_total_item_investigations p, ${statsDB}.datasource d,${statsDB}.result_oids ro
WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
GROUP BY d.id, ro.id, month;
CREATE OR REPLACE VIEW ${usageStatsDB}.view_unique_item_requests AS
SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests,
SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
FROM ${usageStatsDB}.piwiklogdistinct
WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem')
AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
DROP TABLE IF EXISTS ${usageStatsDB}.tbl_unique_item_requests";
CREATE TABLE ${usageStatsDB}.tbl_unique_item_requests as
SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
sum(unique_item_requests) AS unique_item_requests, sum(openaire_referrer) AS openaire
FROM ${usageStatsDB}.view_unique_item_requests p, ${statsDB}.datasource d,${statsDB}.result_oids ro
WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
GROUP BY d.id, ro.id, month;
CREATE OR REPLACE VIEW ${usageStatsDB}.view_total_item_requests
AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
COUNT(entity_id) AS total_item_requests,
SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
FROM ${usageStatsDB}.piwiklogdistinct WHERE action='download'
AND (source_item_type='oaItem' or source_item_type='repItem')
AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
DROP TABLE IF EXISTS $usageStatsDB.tbl_total_item_requests;
CREATE TABLE ${usageStatsDB}.tbl_total_item_requests as
SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
sum(total_item_requests) AS total_item_requests, sum(openaire_referrer) AS openaire
FROM ${usageStatsDB}.view_total_item_requests p, ${statsDB}.datasource d, ${statsDB}.result_oids ro
WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
GROUP BY d.id, ro.id, month;
DROP TABLE IF EXISTS ${usageStatsDB}.tbl_all_r5_metrics;
CREATE TABLE IF NOT EXISTS ${statsDB}.tbl_all_r5_metrics as
WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id,
coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
coalesce(vs.unique_item_investigations, 0) as unique_item_investigations,
coalesce(ds.total_item_investigations, 0) as total_item_investigations
FROM ${usageStatsDB}.tbl_unique_item_investigations AS vs
FULL OUTER JOIN
${usageStatsDB}.tbl_total_item_investigations AS ds
ON ds.source=vs.source AND ds.result_id=vs.result_id AND ds.date=vs.date),
tmp2 AS (select coalesce (ds.repository_id, vs.repository_id) as repository_id,
coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
coalesce(ds.total_item_investigations, 0) as total_item_investigations,
coalesce(ds.unique_item_investigations, 0) as unique_item_investigations,
coalesce(vs.unique_item_requests, 0) as unique_item_requests FROM tmp1
AS ds FULL OUTER JOIN ${usageStatsDB}.tbl_unique_item_requests AS vs
ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date)
SELECT 'OpenAIRE' as source, coalesce (ds.repository_id, vs.repository_id) as repository_id,
coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
coalesce(ds.unique_item_investigations, 0) as unique_item_investigations,
coalesce(ds.total_item_investigations, 0) as total_item_investigations,
coalesce(ds.unique_item_requests, 0) as unique_item_requests,
coalesce(vs.total_item_requests, 0) as total_item_requests
FROM tmp2 AS ds FULL OUTER JOIN ${usageStatsDB}.tbl_total_item_requests
AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;