forked from D-Net/dnet-hadoop
14 lines
583 B
SQL
14 lines
583 B
SQL
--Episciences log
|
|
|
|
DROP TABLE IF EXISTS ${usageStatsDB}.episcienceslogdistinct;
|
|
|
|
CREATE TABLE IF NOT EXISTS
|
|
${usageStatsDB}.episcienceslogdistinct(source INT, id_visit STRING,
|
|
country STRING, action STRING, url STRING,
|
|
entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING)
|
|
clustered by (source, id_visit, action, timestamp, entity_id)
|
|
into 100 buckets stored as orc tblproperties('transactional'='true');
|
|
|
|
INSERT INTO ${usageStatsDB}.episcienceslogdistinct
|
|
SELECT DISTINCT * FROM ${usageStatsDB}.episcienceslog WHERE entity_id is not null;
|