forked from D-Net/dnet-hadoop
109 lines
6.6 KiB
SQL
109 lines
6.6 KiB
SQL
--CoP R5
|
|
|
|
CREATE OR REPLACE VIEW ${usageStatsDB}.view_unique_item_investigations
|
|
AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
|
|
CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations,
|
|
SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
|
|
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
|
|
FROM ${usageStatsDB}.piwiklogdistinct
|
|
WHERE (source_item_type='oaItem' or source_item_type='repItem')
|
|
AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
|
|
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.tbl_unique_item_investigations ";
|
|
|
|
CREATE TABLE ${usageStatsDB}.tbl_unique_item_investigations as
|
|
SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
|
|
sum(unique_item_investigations) AS unique_item_investigations, sum(openaire_referrer) AS openaire
|
|
FROM ${usageStatsDB}.view_unique_item_investigations p, ${statsDB}.datasource d, ${statsDB}.result_oids ro
|
|
WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
|
|
AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
|
|
GROUP BY d.id, ro.id, month;
|
|
|
|
CREATE OR REPLACE VIEW ${usageStatsDB}.view_total_item_investigations
|
|
AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
|
|
COUNT(entity_id) AS total_item_investigations,
|
|
SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
|
|
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
|
|
FROM ${usageStatsDB}.piwiklogdistinct WHERE (source_item_type='oaItem' or source_item_type='repItem')
|
|
AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
|
|
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.tbl_total_item_investigations;
|
|
|
|
CREATE TABLE ${usageStatsDB}.tbl_total_item_investigations AS
|
|
SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
|
|
sum(total_item_investigations) AS total_item_investigations, sum(openaire_referrer) AS openaire
|
|
FROM ${usageStatsDB}.view_total_item_investigations p, ${statsDB}.datasource d,${statsDB}.result_oids ro
|
|
WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
|
|
AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
|
|
GROUP BY d.id, ro.id, month;
|
|
|
|
|
|
CREATE OR REPLACE VIEW ${usageStatsDB}.view_unique_item_requests AS
|
|
SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
|
|
CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests,
|
|
SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
|
|
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
|
|
FROM ${usageStatsDB}.piwiklogdistinct
|
|
WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem')
|
|
AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
|
|
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.tbl_unique_item_requests";
|
|
|
|
CREATE TABLE ${usageStatsDB}.tbl_unique_item_requests as
|
|
SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
|
|
sum(unique_item_requests) AS unique_item_requests, sum(openaire_referrer) AS openaire
|
|
FROM ${usageStatsDB}.view_unique_item_requests p, ${statsDB}.datasource d,${statsDB}.result_oids ro
|
|
WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
|
|
AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
|
|
GROUP BY d.id, ro.id, month;
|
|
|
|
CREATE OR REPLACE VIEW ${usageStatsDB}.view_total_item_requests
|
|
AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
|
|
COUNT(entity_id) AS total_item_requests,
|
|
SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
|
|
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
|
|
FROM ${usageStatsDB}.piwiklogdistinct WHERE action='download'
|
|
AND (source_item_type='oaItem' or source_item_type='repItem')
|
|
AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
|
|
CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
|
|
|
|
DROP TABLE IF EXISTS $usageStatsDB.tbl_total_item_requests;
|
|
|
|
CREATE TABLE ${usageStatsDB}.tbl_total_item_requests as
|
|
SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
|
|
sum(total_item_requests) AS total_item_requests, sum(openaire_referrer) AS openaire
|
|
FROM ${usageStatsDB}.view_total_item_requests p, ${statsDB}.datasource d, ${statsDB}.result_oids ro
|
|
WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
|
|
AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
|
|
GROUP BY d.id, ro.id, month;
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.tbl_all_r5_metrics;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${statsDB}.tbl_all_r5_metrics as
|
|
WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id,
|
|
coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
|
|
coalesce(vs.unique_item_investigations, 0) as unique_item_investigations,
|
|
coalesce(ds.total_item_investigations, 0) as total_item_investigations
|
|
FROM ${usageStatsDB}.tbl_unique_item_investigations AS vs
|
|
FULL OUTER JOIN
|
|
${usageStatsDB}.tbl_total_item_investigations AS ds
|
|
ON ds.source=vs.source AND ds.result_id=vs.result_id AND ds.date=vs.date),
|
|
tmp2 AS (select coalesce (ds.repository_id, vs.repository_id) as repository_id,
|
|
coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
|
|
coalesce(ds.total_item_investigations, 0) as total_item_investigations,
|
|
coalesce(ds.unique_item_investigations, 0) as unique_item_investigations,
|
|
coalesce(vs.unique_item_requests, 0) as unique_item_requests FROM tmp1
|
|
AS ds FULL OUTER JOIN ${usageStatsDB}.tbl_unique_item_requests AS vs
|
|
ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date)
|
|
SELECT 'OpenAIRE' as source, coalesce (ds.repository_id, vs.repository_id) as repository_id,
|
|
coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
|
|
coalesce(ds.unique_item_investigations, 0) as unique_item_investigations,
|
|
coalesce(ds.total_item_investigations, 0) as total_item_investigations,
|
|
coalesce(ds.unique_item_requests, 0) as unique_item_requests,
|
|
coalesce(vs.total_item_requests, 0) as total_item_requests
|
|
FROM tmp2 AS ds FULL OUTER JOIN ${usageStatsDB}.tbl_total_item_requests
|
|
AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;
|