Finished downloadsStats

This commit is contained in:
Spyros Zoupanos 2020-09-11 20:10:37 +03:00
parent f78b5d3f86
commit 968d53f119
1 changed files with 70 additions and 52 deletions

View File

@ -186,7 +186,9 @@ public class PiwikStatsDB {
viewsStats();
System.out.println("====> ViewsStats processing ends");
System.out.println("====> DownloadsStats processing starts");
downloadsStats();
System.out.println("====> DownloadsStats processing starts");
processPortalLog();
log.info("portal process done");
@ -219,7 +221,7 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false);
System.out.println("====> Droping piwiklogtmp_json table");
System.out.println("====> Dropping piwiklogtmp_json table");
String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp_json";
@ -331,7 +333,7 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false);
System.out.println("====> Droping result_views_monthly_tmp table");
System.out.println("====> Dropping result_views_monthly_tmp table");
String drop_result_views_monthly_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".result_views_monthly_tmp";
@ -354,7 +356,7 @@ public class PiwikStatsDB {
System.out.println("====> Droping views_stats_tmp table");
System.out.println("====> Dropping views_stats_tmp table");
String drop_views_stats_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".views_stats_tmp";
@ -375,7 +377,7 @@ public class PiwikStatsDB {
System.out.println("====> Created views_stats_tmp table");
System.out.println("====> Droping views_stats table");
System.out.println("====> Dropping views_stats table");
String drop_views_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".views_stats";
@ -384,13 +386,13 @@ public class PiwikStatsDB {
System.out.println("====> Creating views_stats table");
String create_view_stats =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + "views_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + "views_stats_tmp";
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
stmt.executeUpdate(create_view_stats);
System.out.println("====> Created views_stats table");
System.out.println("====> Droping pageviews_stats_tmp table");
System.out.println("====> Dropping pageviews_stats_tmp table");
String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".pageviews_stats_tmp";
@ -419,69 +421,85 @@ public class PiwikStatsDB {
System.out.println("====> Creating pageviews_stats table");
String create_pageviews_stats =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + "pageviews_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + "pageviews_stats_tmp";
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
stmt.executeUpdate(create_pageviews_stats);
System.out.println("====> Created pageviews_stats table");
stmt.close();
ConnectDB.getConnection().close();
System.exit(0);
}
// public void viewsStats(String piwikid) throws Exception {
// stmt = ConnectDB.getConnection().createStatement();
// ConnectDB.getConnection().setAutoCommit(false);
//
// //String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
// String sql = "CREATE OR REPLACE VIEW result_views_monthly" + piwikid + " AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog" + piwikid + " where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
// stmt.executeUpdate(sql);
//
// // sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
// sql = "CREATE TABLE IF NOT EXISTS views_stats" + piwikid + " AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire FROM result_views_monthly" + piwikid + " p, datasource d, result_oids ro where p.source!='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
// stmt.executeUpdate(sql);
//
//// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
// sql = "CREATE TABLE IF NOT EXISTS pageviews_stats" + piwikid + " AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count FROM result_views_monthly" + piwikid + " p, datasource d, result_oids ro where p.source='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
// stmt.executeUpdate(sql);
//
// sql = "DROP VIEW IF EXISTS result_views_monthly" + piwikid + ";";
// stmt.executeUpdate(sql);
//
// stmt.close();
// ConnectDB.getConnection().commit();
// ConnectDB.getConnection().close();
// }
private void downloadsStats() throws Exception {
Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false);
// String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as
// downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS
// VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or
// source_item_type='repItem') group by id, month, source order by source, id, month;";
String sql = "CREATE OR REPLACE VIEW result_downloads_monthly_tmp as select entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklogtmp where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
System.out.println("====> Dropping result_downloads_monthly_tmp view");
String drop_result_views_monthly_tmp = "DROP VIEW IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".result_views_monthly_tmp";
stmt.executeUpdate(drop_result_views_monthly_tmp);
System.out.println("====> Dropped result_downloads_monthly_tmp view");
System.out.println("====> Creating result_views_monthly_tmp view");
String sql =
"CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " +
"AS SELECT entity_id AS id, COUNT(entity_id) as downloads, " +
"SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, " +
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp where action='download' " +
"AND (source_item_type='oaItem' OR source_item_type='repItem') " +
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source " +
"ORDER BY source, entity_id, month";
stmt.executeUpdate(sql);
System.out.println("====> Created result_views_monthly_tmp view");
System.out.println("====> Dropping downloads_stats_tmp table");
String drop_views_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".downloads_stats_tmp";
stmt.executeUpdate(drop_views_stats);
System.out.println("====> Dropped downloads_stats_tmp table");
System.out.println("====> Creating downloads_stats_tmp view");
sql =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
"SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp p, " +
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
"WHERE p.source=d.piwik_id and p.id=ro.oid " +
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
System.out.println("====> Created downloads_stats_tmp view");
stmt.executeUpdate(sql);
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date,
// max(downloads) AS count INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro
// where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY
// repository_id, result_id, date;";
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
sql = "CREATE TABLE IF NOT EXISTS downloads_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire FROM result_downloads_monthly_tmp p, public.datasource d, public.result_oids ro where p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
stmt.executeUpdate(sql);
System.out.println("====> Dropping downloads_stats table");
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".downloads_stats";
stmt.executeUpdate(drop_pageviews_stats);
System.out.println("====> Dropped downloads_stats table");
System.out.println("====> Creating downloads_stats table");
String create_pageviews_stats =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
stmt.executeUpdate(create_pageviews_stats);
System.out.println("====> Created downloads_stats table");
sql = "CREATE TABLE IF NOT EXISTS downloads_stats (like downloads_stats_tmp including all)";
stmt.executeUpdate(sql);
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp;";
System.out.println("====> Dropping pageviews_stats table");
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp";
System.out.println("====> Dropped pageviews_stats table");
stmt.executeUpdate(sql);
stmt.close();
ConnectDB.getConnection().commit();
ConnectDB.getConnection().close();
System.exit(0);
}
public void finalizeStats() throws Exception {