From 968d53f1199ee0756fcd47c28f497fd340110cf7 Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Fri, 11 Sep 2020 20:10:37 +0300 Subject: [PATCH] Finished downloadsStats --- .../graph/usagestats/export/PiwikStatsDB.java | 122 ++++++++++-------- 1 file changed, 70 insertions(+), 52 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index 1b7035fee5..2d3f7b08b8 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -186,7 +186,9 @@ public class PiwikStatsDB { viewsStats(); System.out.println("====> ViewsStats processing ends"); + System.out.println("====> DownloadsStats processing starts"); downloadsStats(); + System.out.println("====> DownloadsStats processing starts"); processPortalLog(); log.info("portal process done"); @@ -219,7 +221,7 @@ public class PiwikStatsDB { Statement stmt = ConnectDB.getConnection().createStatement(); ConnectDB.getConnection().setAutoCommit(false); - System.out.println("====> Droping piwiklogtmp_json table"); + System.out.println("====> Dropping piwiklogtmp_json table"); String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json"; @@ -331,7 +333,7 @@ public class PiwikStatsDB { Statement stmt = ConnectDB.getConnection().createStatement(); ConnectDB.getConnection().setAutoCommit(false); - System.out.println("====> Droping result_views_monthly_tmp table"); + System.out.println("====> Dropping result_views_monthly_tmp table"); String drop_result_views_monthly_tmp = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp"; @@ -354,7 +356,7 @@ public class PiwikStatsDB { - System.out.println("====> Droping views_stats_tmp table"); + System.out.println("====> Dropping views_stats_tmp table"); String drop_views_stats_tmp = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp"; @@ -375,7 +377,7 @@ public class PiwikStatsDB { System.out.println("====> Created views_stats_tmp table"); - System.out.println("====> Droping views_stats table"); + System.out.println("====> Dropping views_stats table"); String drop_views_stats = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats"; @@ -384,13 +386,13 @@ public class PiwikStatsDB { System.out.println("====> Creating views_stats table"); String create_view_stats = - "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + "views_stats " + - "STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + "views_stats_tmp"; + "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " + + "STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp"; stmt.executeUpdate(create_view_stats); System.out.println("====> Created views_stats table"); - System.out.println("====> Droping pageviews_stats_tmp table"); + System.out.println("====> Dropping pageviews_stats_tmp table"); String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp"; @@ -419,69 +421,85 @@ public class PiwikStatsDB { System.out.println("====> Creating pageviews_stats table"); String create_pageviews_stats = - "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + "pageviews_stats " + - "STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + "pageviews_stats_tmp"; + "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " + + "STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp"; stmt.executeUpdate(create_pageviews_stats); System.out.println("====> Created pageviews_stats table"); stmt.close(); ConnectDB.getConnection().close(); - - System.exit(0); } -// public void viewsStats(String piwikid) throws Exception { -// stmt = ConnectDB.getConnection().createStatement(); -// ConnectDB.getConnection().setAutoCommit(false); -// -// //String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; -// String sql = "CREATE OR REPLACE VIEW result_views_monthly" + piwikid + " AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog" + piwikid + " where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; -// stmt.executeUpdate(sql); -// -// // sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; -// sql = "CREATE TABLE IF NOT EXISTS views_stats" + piwikid + " AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire FROM result_views_monthly" + piwikid + " p, datasource d, result_oids ro where p.source!='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; -// stmt.executeUpdate(sql); -// -//// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; -// sql = "CREATE TABLE IF NOT EXISTS pageviews_stats" + piwikid + " AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count FROM result_views_monthly" + piwikid + " p, datasource d, result_oids ro where p.source='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; -// stmt.executeUpdate(sql); -// -// sql = "DROP VIEW IF EXISTS result_views_monthly" + piwikid + ";"; -// stmt.executeUpdate(sql); -// -// stmt.close(); -// ConnectDB.getConnection().commit(); -// ConnectDB.getConnection().close(); -// } - private void downloadsStats() throws Exception { Statement stmt = ConnectDB.getConnection().createStatement(); ConnectDB.getConnection().setAutoCommit(false); - // String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as - // downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS - // VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or - // source_item_type='repItem') group by id, month, source order by source, id, month;"; - String sql = "CREATE OR REPLACE VIEW result_downloads_monthly_tmp as select entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklogtmp where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; + System.out.println("====> Dropping result_downloads_monthly_tmp view"); + String drop_result_views_monthly_tmp = "DROP VIEW IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".result_views_monthly_tmp"; + stmt.executeUpdate(drop_result_views_monthly_tmp); + System.out.println("====> Dropped result_downloads_monthly_tmp view"); + + System.out.println("====> Creating result_views_monthly_tmp view"); + String sql = + "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " + + "AS SELECT entity_id AS id, COUNT(entity_id) as downloads, " + + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, " + + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp where action='download' " + + "AND (source_item_type='oaItem' OR source_item_type='repItem') " + + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source " + + "ORDER BY source, entity_id, month"; + stmt.executeUpdate(sql); + System.out.println("====> Created result_views_monthly_tmp view"); + + + System.out.println("====> Dropping downloads_stats_tmp table"); + String drop_views_stats = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".downloads_stats_tmp"; + stmt.executeUpdate(drop_views_stats); + System.out.println("====> Dropped downloads_stats_tmp table"); + + System.out.println("====> Creating downloads_stats_tmp view"); + sql = + "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " + + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " + + "max(downloads) AS count, max(openaire_referrer) AS openaire " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp p, " + + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " + + "WHERE p.source=d.piwik_id and p.id=ro.oid " + + "GROUP BY d.id, ro.id, month " + + "ORDER BY d.id, ro.id, month"; + System.out.println("====> Created downloads_stats_tmp view"); stmt.executeUpdate(sql); - // sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, - // max(downloads) AS count INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro - // where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY - // repository_id, result_id, date;"; -// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; - sql = "CREATE TABLE IF NOT EXISTS downloads_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire FROM result_downloads_monthly_tmp p, public.datasource d, public.result_oids ro where p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; - stmt.executeUpdate(sql); + + + System.out.println("====> Dropping downloads_stats table"); + String drop_pageviews_stats = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".downloads_stats"; + stmt.executeUpdate(drop_pageviews_stats); + System.out.println("====> Dropped downloads_stats table"); + + System.out.println("====> Creating downloads_stats table"); + String create_pageviews_stats = + "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " + + "STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp"; + stmt.executeUpdate(create_pageviews_stats); + System.out.println("====> Created downloads_stats table"); - sql = "CREATE TABLE IF NOT EXISTS downloads_stats (like downloads_stats_tmp including all)"; - stmt.executeUpdate(sql); - - sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp;"; + System.out.println("====> Dropping pageviews_stats table"); + sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp"; + System.out.println("====> Dropped pageviews_stats table"); stmt.executeUpdate(sql); stmt.close(); - ConnectDB.getConnection().commit(); ConnectDB.getConnection().close(); + + System.exit(0); } public void finalizeStats() throws Exception {