More progress on viewsStats

This commit is contained in:
Spyros Zoupanos 2020-09-10 22:27:19 +03:00
parent 1d9f8f79a8
commit 2d2d1b9694
1 changed files with 93 additions and 25 deletions

View File

@ -178,11 +178,14 @@ public class PiwikStatsDB {
log.info("removing double clicks done");
System.out.println("====> Cleaning oai");
cleanOAI();
// cleanOAI();
System.out.println("====> Cleaning oai done");
log.info("cleaning oai done");
System.out.println("====> ViewsStats processing starts");
viewsStats();
System.out.println("====> ViewsStats processing ends");
downloadsStats();
processPortalLog();
@ -328,33 +331,101 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false);
// String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as
// views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS
// VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or
// source_item_type='repItem') group by id, month, source order by source, id, month;";
String sql = "CREATE OR REPLACE VIEW result_views_monthly_tmp AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklogtmp where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
stmt.executeUpdate(sql);
System.out.println("====> Droping result_views_monthly_tmp table");
String drop_result_views_monthly_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".result_views_monthly_tmp";
stmt.executeUpdate(drop_result_views_monthly_tmp);
System.out.println("====> Dropped result_views_monthly_tmp table");
System.out.println("====> Creating result_views_monthly_tmp table");
String create_result_views_monthly_tmp =
"CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp " +
"AS SELECT entity_id AS id, " +
"COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) " +
"AS openaire_referrer, " +
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
"FROM `usagestats_13`.piwiklogtmp where action='action' and (source_item_type='oaItem' or " +
"source_item_type='repItem') " +
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
"source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_views_monthly_tmp);
System.out.println("====> Created result_views_monthly_tmp table");
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date,
// max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p,
// datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by
// repository_id, result_id, date ORDER BY repository_id, result_id, date;";
sql = "CREATE TABLE IF NOT EXISTS views_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire FROM result_views_monthly_tmp p, public.datasource d, public.result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
stmt.executeUpdate(sql);
System.out.println("====> Droping views_stats_tmp table");
String drop_views_stats_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".views_stats_tmp";
stmt.executeUpdate(drop_views_stats_tmp);
System.out.println("====> Dropped views_stats_tmp table");
System.out.println("====> Creating views_stats_tmp table");
String create_views_stats_tmp =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp " +
"AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
"max(views) AS count, max(openaire_referrer) AS openaire " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " +
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
"WHERE p.source!='5' AND p.source=d.piwik_id AND p.id=ro.oid " +
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
stmt.executeUpdate(create_views_stats_tmp);
System.out.println("====> Created views_stats_tmp table");
System.out.println("====> Droping views_stats table");
String drop_views_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".views_stats";
stmt.executeUpdate(drop_views_stats);
System.out.println("====> Dropped views_stats table");
sql = "CREATE TABLE IF NOT EXISTS views_stats (like views_stats_tmp including all)";
stmt.executeUpdate(sql);
System.out.println("====> Creating views_stats table");
String create_view_stats =
"CREATE TABLE IF NOT EXISTS views_stats STORED AS PARQUET AS SELECT * FROM views_stats_tmp";
stmt.executeUpdate(create_view_stats);
System.out.println("====> Created views_stats table");
System.out.println("====> Droping pageviews_stats_tmp table");
String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".pageviews_stats_tmp";
stmt.executeUpdate(drop_pageviews_stats_tmp);
System.out.println("====> Dropped pageviews_stats_tmp table");
System.out.println("====> Creating pageviews_stats_tmp table");
String create_pageviews_stats_tmp =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp AS SELECT " +
"'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " +
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
"WHERE p.source='23' AND p.source=d.piwik_id and p.id=ro.oid \n" +
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
stmt.executeUpdate(create_pageviews_stats_tmp);
System.out.println("====> Created pageviews_stats_tmp table");
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
sql = "CREATE TABLE IF NOT EXISTS pageviews_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count FROM result_views_monthly_tmp p, public.datasource d, public.result_oids ro where p.source='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
stmt.executeUpdate(sql);
sql = "CREATE TABLE IF NOT EXISTS pageviews_stats (like pageviews_stats_tmp including all)";
stmt.executeUpdate(sql);
System.out.println("====> Droping pageviews_stats table");
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".pageviews_stats";
stmt.executeUpdate(drop_pageviews_stats);
System.out.println("====> Dropped pageviews_stats table");
System.out.println("====> Creating pageviews_stats table");
String create_pageviews_stats =
"CREATE TABLE IF NOT EXISTS pageviews_stats STORED AS PARQUET AS SELECT * FROM pageviews_stats_tmp";
stmt.executeUpdate(create_pageviews_stats);
System.out.println("====> Created pageviews_stats table");
stmt.close();
ConnectDB.getConnection().commit();
ConnectDB.getConnection().close();
System.exit(0);
}
// public void viewsStats(String piwikid) throws Exception {
@ -1016,10 +1087,7 @@ public class PiwikStatsDB {
System.out.println("====> Cleaning oai - Done, closing connection");
ConnectDB.getConnection().close();
System.exit(0);
}
private String processPortalURL(String url) {