forked from D-Net/dnet-hadoop
Finished downloadsStats
This commit is contained in:
parent
f78b5d3f86
commit
968d53f119
|
@ -186,7 +186,9 @@ public class PiwikStatsDB {
|
|||
viewsStats();
|
||||
System.out.println("====> ViewsStats processing ends");
|
||||
|
||||
System.out.println("====> DownloadsStats processing starts");
|
||||
downloadsStats();
|
||||
System.out.println("====> DownloadsStats processing starts");
|
||||
|
||||
processPortalLog();
|
||||
log.info("portal process done");
|
||||
|
@ -219,7 +221,7 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Droping piwiklogtmp_json table");
|
||||
System.out.println("====> Dropping piwiklogtmp_json table");
|
||||
String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp_json";
|
||||
|
@ -331,7 +333,7 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Droping result_views_monthly_tmp table");
|
||||
System.out.println("====> Dropping result_views_monthly_tmp table");
|
||||
String drop_result_views_monthly_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".result_views_monthly_tmp";
|
||||
|
@ -354,7 +356,7 @@ public class PiwikStatsDB {
|
|||
|
||||
|
||||
|
||||
System.out.println("====> Droping views_stats_tmp table");
|
||||
System.out.println("====> Dropping views_stats_tmp table");
|
||||
String drop_views_stats_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".views_stats_tmp";
|
||||
|
@ -375,7 +377,7 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Created views_stats_tmp table");
|
||||
|
||||
|
||||
System.out.println("====> Droping views_stats table");
|
||||
System.out.println("====> Dropping views_stats table");
|
||||
String drop_views_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".views_stats";
|
||||
|
@ -384,13 +386,13 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Creating views_stats table");
|
||||
String create_view_stats =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + "views_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + "views_stats_tmp";
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
|
||||
stmt.executeUpdate(create_view_stats);
|
||||
System.out.println("====> Created views_stats table");
|
||||
|
||||
|
||||
System.out.println("====> Droping pageviews_stats_tmp table");
|
||||
System.out.println("====> Dropping pageviews_stats_tmp table");
|
||||
String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".pageviews_stats_tmp";
|
||||
|
@ -419,69 +421,85 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Creating pageviews_stats table");
|
||||
String create_pageviews_stats =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + "pageviews_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + "pageviews_stats_tmp";
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
|
||||
stmt.executeUpdate(create_pageviews_stats);
|
||||
System.out.println("====> Created pageviews_stats table");
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getConnection().close();
|
||||
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
// public void viewsStats(String piwikid) throws Exception {
|
||||
// stmt = ConnectDB.getConnection().createStatement();
|
||||
// ConnectDB.getConnection().setAutoCommit(false);
|
||||
//
|
||||
// //String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
|
||||
// String sql = "CREATE OR REPLACE VIEW result_views_monthly" + piwikid + " AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklog" + piwikid + " where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
|
||||
// stmt.executeUpdate(sql);
|
||||
//
|
||||
// // sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
||||
// sql = "CREATE TABLE IF NOT EXISTS views_stats" + piwikid + " AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire FROM result_views_monthly" + piwikid + " p, datasource d, result_oids ro where p.source!='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
||||
// stmt.executeUpdate(sql);
|
||||
//
|
||||
//// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
||||
// sql = "CREATE TABLE IF NOT EXISTS pageviews_stats" + piwikid + " AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count FROM result_views_monthly" + piwikid + " p, datasource d, result_oids ro where p.source='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
||||
// stmt.executeUpdate(sql);
|
||||
//
|
||||
// sql = "DROP VIEW IF EXISTS result_views_monthly" + piwikid + ";";
|
||||
// stmt.executeUpdate(sql);
|
||||
//
|
||||
// stmt.close();
|
||||
// ConnectDB.getConnection().commit();
|
||||
// ConnectDB.getConnection().close();
|
||||
// }
|
||||
|
||||
private void downloadsStats() throws Exception {
|
||||
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
// String sql = "CREATE OR REPLACE VIEW result_downloads_monthly as select entity_id AS id, COUNT(entity_id) as
|
||||
// downloads, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS
|
||||
// VARCHAR), 2, '0') AS month, source FROM piwiklog where action='download' and (source_item_type='oaItem' or
|
||||
// source_item_type='repItem') group by id, month, source order by source, id, month;";
|
||||
String sql = "CREATE OR REPLACE VIEW result_downloads_monthly_tmp as select entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklogtmp where action='download' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
|
||||
System.out.println("====> Dropping result_downloads_monthly_tmp view");
|
||||
String drop_result_views_monthly_tmp = "DROP VIEW IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".result_views_monthly_tmp";
|
||||
stmt.executeUpdate(drop_result_views_monthly_tmp);
|
||||
System.out.println("====> Dropped result_downloads_monthly_tmp view");
|
||||
|
||||
System.out.println("====> Creating result_views_monthly_tmp view");
|
||||
String sql =
|
||||
"CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " +
|
||||
"AS SELECT entity_id AS id, COUNT(entity_id) as downloads, " +
|
||||
"SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
||||
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp where action='download' " +
|
||||
"AND (source_item_type='oaItem' OR source_item_type='repItem') " +
|
||||
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source " +
|
||||
"ORDER BY source, entity_id, month";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Created result_views_monthly_tmp view");
|
||||
|
||||
|
||||
System.out.println("====> Dropping downloads_stats_tmp table");
|
||||
String drop_views_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".downloads_stats_tmp";
|
||||
stmt.executeUpdate(drop_views_stats);
|
||||
System.out.println("====> Dropped downloads_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating downloads_stats_tmp view");
|
||||
sql =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
|
||||
"SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp p, " +
|
||||
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
|
||||
"WHERE p.source=d.piwik_id and p.id=ro.oid " +
|
||||
"GROUP BY d.id, ro.id, month " +
|
||||
"ORDER BY d.id, ro.id, month";
|
||||
System.out.println("====> Created downloads_stats_tmp view");
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date,
|
||||
// max(downloads) AS count INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro
|
||||
// where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY
|
||||
// repository_id, result_id, date;";
|
||||
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire INTO downloads_stats FROM result_downloads_monthly p, datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
||||
sql = "CREATE TABLE IF NOT EXISTS downloads_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(downloads) AS count, max(openaire_referrer) AS openaire FROM result_downloads_monthly_tmp p, public.datasource d, public.result_oids ro where p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
|
||||
System.out.println("====> Dropping downloads_stats table");
|
||||
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".downloads_stats";
|
||||
stmt.executeUpdate(drop_pageviews_stats);
|
||||
System.out.println("====> Dropped downloads_stats table");
|
||||
|
||||
System.out.println("====> Creating downloads_stats table");
|
||||
String create_pageviews_stats =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
|
||||
stmt.executeUpdate(create_pageviews_stats);
|
||||
System.out.println("====> Created downloads_stats table");
|
||||
|
||||
sql = "CREATE TABLE IF NOT EXISTS downloads_stats (like downloads_stats_tmp including all)";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp;";
|
||||
System.out.println("====> Dropping pageviews_stats table");
|
||||
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp";
|
||||
System.out.println("====> Dropped pageviews_stats table");
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getConnection().commit();
|
||||
ConnectDB.getConnection().close();
|
||||
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
public void finalizeStats() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue