forked from D-Net/dnet-hadoop
More progress on viewsStats
This commit is contained in:
parent
1d9f8f79a8
commit
2d2d1b9694
|
@ -178,11 +178,14 @@ public class PiwikStatsDB {
|
||||||
log.info("removing double clicks done");
|
log.info("removing double clicks done");
|
||||||
|
|
||||||
System.out.println("====> Cleaning oai");
|
System.out.println("====> Cleaning oai");
|
||||||
cleanOAI();
|
// cleanOAI();
|
||||||
System.out.println("====> Cleaning oai done");
|
System.out.println("====> Cleaning oai done");
|
||||||
log.info("cleaning oai done");
|
log.info("cleaning oai done");
|
||||||
|
|
||||||
|
System.out.println("====> ViewsStats processing starts");
|
||||||
viewsStats();
|
viewsStats();
|
||||||
|
System.out.println("====> ViewsStats processing ends");
|
||||||
|
|
||||||
downloadsStats();
|
downloadsStats();
|
||||||
|
|
||||||
processPortalLog();
|
processPortalLog();
|
||||||
|
@ -328,33 +331,101 @@ public class PiwikStatsDB {
|
||||||
Statement stmt = ConnectDB.getConnection().createStatement();
|
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||||
ConnectDB.getConnection().setAutoCommit(false);
|
ConnectDB.getConnection().setAutoCommit(false);
|
||||||
|
|
||||||
// String sql = "CREATE OR REPLACE VIEW result_views_monthly AS SELECT entity_id AS id, COUNT(entity_id) as
|
System.out.println("====> Droping result_views_monthly_tmp table");
|
||||||
// views, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS
|
String drop_result_views_monthly_tmp = "DROP TABLE IF EXISTS " +
|
||||||
// VARCHAR), 2, '0') AS month, source FROM piwiklog where action='action' and (source_item_type='oaItem' or
|
ConnectDB.getUsageStatsDBSchema() +
|
||||||
// source_item_type='repItem') group by id, month, source order by source, id, month;";
|
".result_views_monthly_tmp";
|
||||||
String sql = "CREATE OR REPLACE VIEW result_views_monthly_tmp AS SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, extract('year' from timestamp::date) ||'/'|| LPAD(CAST(extract('month' from timestamp::date) AS VARCHAR), 2, '0') AS month, source FROM piwiklogtmp where action='action' and (source_item_type='oaItem' or source_item_type='repItem') group by id, month, source order by source, id, month;";
|
stmt.executeUpdate(drop_result_views_monthly_tmp);
|
||||||
stmt.executeUpdate(sql);
|
System.out.println("====> Dropped result_views_monthly_tmp table");
|
||||||
|
|
||||||
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date,
|
System.out.println("====> Creating result_views_monthly_tmp table");
|
||||||
// max(views) AS count, max(openaire_referrer) AS openaire INTO views_stats FROM result_views_monthly p,
|
String create_result_views_monthly_tmp =
|
||||||
// datasource d, result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by
|
"CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp " +
|
||||||
// repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
"AS SELECT entity_id AS id, " +
|
||||||
sql = "CREATE TABLE IF NOT EXISTS views_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count, max(openaire_referrer) AS openaire FROM result_views_monthly_tmp p, public.datasource d, public.result_oids ro where p.source!='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
"COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) " +
|
||||||
stmt.executeUpdate(sql);
|
"AS openaire_referrer, " +
|
||||||
|
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
||||||
|
"FROM `usagestats_13`.piwiklogtmp where action='action' and (source_item_type='oaItem' or " +
|
||||||
|
"source_item_type='repItem') " +
|
||||||
|
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
||||||
|
"source ORDER BY source, entity_id";
|
||||||
|
stmt.executeUpdate(create_result_views_monthly_tmp);
|
||||||
|
System.out.println("====> Created result_views_monthly_tmp table");
|
||||||
|
|
||||||
sql = "CREATE TABLE IF NOT EXISTS views_stats (like views_stats_tmp including all)";
|
|
||||||
stmt.executeUpdate(sql);
|
|
||||||
|
|
||||||
// sql = "SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count INTO pageviews_stats FROM result_views_monthly p, datasource d, result_oids ro where p.source='5' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
|
||||||
sql = "CREATE TABLE IF NOT EXISTS pageviews_stats_tmp AS SELECT 'OpenAIRE'::TEXT as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count FROM result_views_monthly_tmp p, public.datasource d, public.result_oids ro where p.source='109' AND p.source=d.piwik_id and p.id=ro.orid group by repository_id, result_id, date ORDER BY repository_id, result_id, date;";
|
|
||||||
stmt.executeUpdate(sql);
|
|
||||||
|
|
||||||
sql = "CREATE TABLE IF NOT EXISTS pageviews_stats (like pageviews_stats_tmp including all)";
|
System.out.println("====> Droping views_stats_tmp table");
|
||||||
stmt.executeUpdate(sql);
|
String drop_views_stats_tmp = "DROP TABLE IF EXISTS " +
|
||||||
|
ConnectDB.getUsageStatsDBSchema() +
|
||||||
|
".views_stats_tmp";
|
||||||
|
stmt.executeUpdate(drop_views_stats_tmp);
|
||||||
|
System.out.println("====> Dropped views_stats_tmp table");
|
||||||
|
|
||||||
|
System.out.println("====> Creating views_stats_tmp table");
|
||||||
|
String create_views_stats_tmp =
|
||||||
|
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp " +
|
||||||
|
"AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||||
|
"max(views) AS count, max(openaire_referrer) AS openaire " +
|
||||||
|
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " +
|
||||||
|
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
|
||||||
|
"WHERE p.source!='5' AND p.source=d.piwik_id AND p.id=ro.oid " +
|
||||||
|
"GROUP BY d.id, ro.id, month " +
|
||||||
|
"ORDER BY d.id, ro.id, month";
|
||||||
|
stmt.executeUpdate(create_views_stats_tmp);
|
||||||
|
System.out.println("====> Created views_stats_tmp table");
|
||||||
|
|
||||||
|
|
||||||
|
System.out.println("====> Droping views_stats table");
|
||||||
|
String drop_views_stats = "DROP TABLE IF EXISTS " +
|
||||||
|
ConnectDB.getUsageStatsDBSchema() +
|
||||||
|
".views_stats";
|
||||||
|
stmt.executeUpdate(drop_views_stats);
|
||||||
|
System.out.println("====> Dropped views_stats table");
|
||||||
|
|
||||||
|
System.out.println("====> Creating views_stats table");
|
||||||
|
String create_view_stats =
|
||||||
|
"CREATE TABLE IF NOT EXISTS views_stats STORED AS PARQUET AS SELECT * FROM views_stats_tmp";
|
||||||
|
stmt.executeUpdate(create_view_stats);
|
||||||
|
System.out.println("====> Created views_stats table");
|
||||||
|
|
||||||
|
|
||||||
|
System.out.println("====> Droping pageviews_stats_tmp table");
|
||||||
|
String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " +
|
||||||
|
ConnectDB.getUsageStatsDBSchema() +
|
||||||
|
".pageviews_stats_tmp";
|
||||||
|
stmt.executeUpdate(drop_pageviews_stats_tmp);
|
||||||
|
System.out.println("====> Dropped pageviews_stats_tmp table");
|
||||||
|
|
||||||
|
System.out.println("====> Creating pageviews_stats_tmp table");
|
||||||
|
String create_pageviews_stats_tmp =
|
||||||
|
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp AS SELECT " +
|
||||||
|
"'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count " +
|
||||||
|
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " +
|
||||||
|
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
|
||||||
|
"WHERE p.source='23' AND p.source=d.piwik_id and p.id=ro.oid \n" +
|
||||||
|
"GROUP BY d.id, ro.id, month " +
|
||||||
|
"ORDER BY d.id, ro.id, month";
|
||||||
|
stmt.executeUpdate(create_pageviews_stats_tmp);
|
||||||
|
System.out.println("====> Created pageviews_stats_tmp table");
|
||||||
|
|
||||||
|
|
||||||
|
System.out.println("====> Droping pageviews_stats table");
|
||||||
|
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
|
||||||
|
ConnectDB.getUsageStatsDBSchema() +
|
||||||
|
".pageviews_stats";
|
||||||
|
stmt.executeUpdate(drop_pageviews_stats);
|
||||||
|
System.out.println("====> Dropped pageviews_stats table");
|
||||||
|
|
||||||
|
System.out.println("====> Creating pageviews_stats table");
|
||||||
|
String create_pageviews_stats =
|
||||||
|
"CREATE TABLE IF NOT EXISTS pageviews_stats STORED AS PARQUET AS SELECT * FROM pageviews_stats_tmp";
|
||||||
|
stmt.executeUpdate(create_pageviews_stats);
|
||||||
|
System.out.println("====> Created pageviews_stats table");
|
||||||
|
|
||||||
stmt.close();
|
stmt.close();
|
||||||
ConnectDB.getConnection().commit();
|
|
||||||
ConnectDB.getConnection().close();
|
ConnectDB.getConnection().close();
|
||||||
|
|
||||||
|
System.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// public void viewsStats(String piwikid) throws Exception {
|
// public void viewsStats(String piwikid) throws Exception {
|
||||||
|
@ -1016,10 +1087,7 @@ public class PiwikStatsDB {
|
||||||
|
|
||||||
|
|
||||||
System.out.println("====> Cleaning oai - Done, closing connection");
|
System.out.println("====> Cleaning oai - Done, closing connection");
|
||||||
|
|
||||||
ConnectDB.getConnection().close();
|
ConnectDB.getConnection().close();
|
||||||
|
|
||||||
System.exit(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String processPortalURL(String url) {
|
private String processPortalURL(String url) {
|
||||||
|
|
Loading…
Reference in New Issue