From 2d2d1b9694f26da41ab687e6c9b9171c22d53c55 Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Thu, 10 Sep 2020 22:27:19 +0300 Subject: [PATCH] More progress on viewsStats --- .../graph/usagestats/export/PiwikStatsDB.java | 118 ++++++++++++++---- 1 file changed, 93 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index 3ec7d39493..63264d8faa 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -178,11 +178,14 @@ public class PiwikStatsDB { log.info("removing double clicks done"); System.out.println("====> Cleaning oai"); - cleanOAI(); +// cleanOAI(); System.out.println("====> Cleaning oai done"); log.info("cleaning oai done"); + System.out.println("====> ViewsStats processing starts"); viewsStats(); + System.out.println("====> ViewsStats processing ends"); + downloadsStats(); processPortalLog(); @@ -328,33 +331,101 @@ public class PiwikStatsDB { Statement stmt = ConnectDB.getConnection().createStatement(); ConnectDB.getConnection().setAutoCommit(false); - // String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as - // views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS - // VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or - // source_item_type='repItem') group by id, month, source order by source, id, month;"; - String sql = "CREATE OR REPLACE VIEW result_views_monthly_tmp AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklogtmp where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;"; - stmt.executeUpdate(sql); + System.out.println("====> Droping result_views_monthly_tmp table"); + String drop_result_views_monthly_tmp = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".result_views_monthly_tmp"; + stmt.executeUpdate(drop_result_views_monthly_tmp); + System.out.println("====> Dropped result_views_monthly_tmp table"); + + System.out.println("====> Creating result_views_monthly_tmp table"); + String create_result_views_monthly_tmp = + "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp " + + "AS SELECT entity_id AS id, " + + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) " + + "AS openaire_referrer, " + + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " + + "FROM `usagestats_13`.piwiklogtmp where action='action' and (source_item_type='oaItem' or " + + "source_item_type='repItem') " + + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " + + "source ORDER BY source, entity_id"; + stmt.executeUpdate(create_result_views_monthly_tmp); + System.out.println("====> Created result_views_monthly_tmp table"); + - // sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, - // max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p, - // datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by - // repository_id, result_id, date ORDER BY repository_id, result_id, date;"; - sql = "CREATE TABLE IF NOT EXISTS views_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire FROM result_views_monthly_tmp p, public.datasource d, public.result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; - stmt.executeUpdate(sql); + + System.out.println("====> Droping views_stats_tmp table"); + String drop_views_stats_tmp = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".views_stats_tmp"; + stmt.executeUpdate(drop_views_stats_tmp); + System.out.println("====> Dropped views_stats_tmp table"); + + System.out.println("====> Creating views_stats_tmp table"); + String create_views_stats_tmp = + "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp " + + "AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " + + "max(views) AS count, max(openaire_referrer) AS openaire " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " + + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " + + "WHERE p.source!='5' AND p.source=d.piwik_id AND p.id=ro.oid " + + "GROUP BY d.id, ro.id, month " + + "ORDER BY d.id, ro.id, month"; + stmt.executeUpdate(create_views_stats_tmp); + System.out.println("====> Created views_stats_tmp table"); + + + System.out.println("====> Droping views_stats table"); + String drop_views_stats = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".views_stats"; + stmt.executeUpdate(drop_views_stats); + System.out.println("====> Dropped views_stats table"); - sql = "CREATE TABLE IF NOT EXISTS views_stats (like views_stats_tmp including all)"; - stmt.executeUpdate(sql); + System.out.println("====> Creating views_stats table"); + String create_view_stats = + "CREATE TABLE IF NOT EXISTS views_stats STORED AS PARQUET AS SELECT * FROM views_stats_tmp"; + stmt.executeUpdate(create_view_stats); + System.out.println("====> Created views_stats table"); + + + System.out.println("====> Droping pageviews_stats_tmp table"); + String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".pageviews_stats_tmp"; + stmt.executeUpdate(drop_pageviews_stats_tmp); + System.out.println("====> Dropped pageviews_stats_tmp table"); + + System.out.println("====> Creating pageviews_stats_tmp table"); + String create_pageviews_stats_tmp = + "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp AS SELECT " + + "'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " + + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " + + "WHERE p.source='23' AND p.source=d.piwik_id and p.id=ro.oid \n" + + "GROUP BY d.id, ro.id, month " + + "ORDER BY d.id, ro.id, month"; + stmt.executeUpdate(create_pageviews_stats_tmp); + System.out.println("====> Created pageviews_stats_tmp table"); -// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; - sql = "CREATE TABLE IF NOT EXISTS pageviews_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count FROM result_views_monthly_tmp p, public.datasource d, public.result_oids ro where p.source='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;"; - stmt.executeUpdate(sql); - - sql = "CREATE TABLE IF NOT EXISTS pageviews_stats (like pageviews_stats_tmp including all)"; - stmt.executeUpdate(sql); + + System.out.println("====> Droping pageviews_stats table"); + String drop_pageviews_stats = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".pageviews_stats"; + stmt.executeUpdate(drop_pageviews_stats); + System.out.println("====> Dropped pageviews_stats table"); + + System.out.println("====> Creating pageviews_stats table"); + String create_pageviews_stats = + "CREATE TABLE IF NOT EXISTS pageviews_stats STORED AS PARQUET AS SELECT * FROM pageviews_stats_tmp"; + stmt.executeUpdate(create_pageviews_stats); + System.out.println("====> Created pageviews_stats table"); stmt.close(); - ConnectDB.getConnection().commit(); ConnectDB.getConnection().close(); + + System.exit(0); } // public void viewsStats(String piwikid) throws Exception { @@ -1016,10 +1087,7 @@ public class PiwikStatsDB { System.out.println("====> Cleaning oai - Done, closing connection"); - ConnectDB.getConnection().close(); - - System.exit(0); } private String processPortalURL(String url) {