dnet-hadoop/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step10.sql

11 lines
609 B
SQL

-- LaReferencia Distinct
DROP TABLE IF EXISTS ${usageStatsDB}.lareferencialogdistinct;
CREATE TABLE IF NOT EXISTS ${usageStatsDB}.lareferencialogdistinct(matomoid INT, source INT, id_visit STRING, country STRING, action STRING, url STRING,
entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING)
clustered by (source, id_visit, action, timestamp, entity_id)
into 100 buckets stored as orc tblproperties('transactional'='true');
INSERT INTO ${usageStatsDB}.lareferencialogdistinct
SELECT DISTINCT * FROM ${usageRawDataDB}.lareferencialog WHERE entity_id is not null;