|
|
|
@ -101,12 +101,43 @@ public class PiwikStatsDB {
|
|
|
|
|
logger.info("Inserted data to piwiklogdistinct");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void createDistinctEpisciencesLog() throws Exception {
|
|
|
|
|
logger.info("Initialising DB properties");
|
|
|
|
|
ConnectDB.init();
|
|
|
|
|
|
|
|
|
|
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
|
|
|
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
|
|
|
|
|
|
|
|
|
logger.info("Dropping episcienceslogdistinct");
|
|
|
|
|
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".episcienceslogdistinct";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Dropped episcienceslogdistinct");
|
|
|
|
|
|
|
|
|
|
logger.info("Creating episcienceslogdistinct table");
|
|
|
|
|
// Create Piwiklogdistinct table - This table should exist
|
|
|
|
|
String sqlCreateTablePiwikLogDistinct = "CREATE TABLE IF NOT EXISTS "
|
|
|
|
|
+ ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".episcienceslogdistinct(source INT, id_visit STRING, country STRING, action STRING, url STRING, "
|
|
|
|
|
+ "entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) "
|
|
|
|
|
+ "clustered by (source, id_visit, action, timestamp, entity_id) "
|
|
|
|
|
+ "into 100 buckets stored as orc tblproperties('transactional'='true')";
|
|
|
|
|
stmt.executeUpdate(sqlCreateTablePiwikLogDistinct);
|
|
|
|
|
logger.info("Created episcienceslogdistinct table");
|
|
|
|
|
|
|
|
|
|
logger.info("Inserting data to episcienceslogdistinct");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".episcienceslogdistinct "
|
|
|
|
|
+ "SELECT DISTINCT * FROM " + ConnectDB.getUsageRawDataDBSchema()
|
|
|
|
|
+ ".episcienceslog WHERE entity_id is not null";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Inserted data to episcienceslogdistinct");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void processLogs() throws Exception {
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
|
|
logger.info("ViewsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
|
|
|
|
viewsStats();
|
|
|
|
|
logger.info("ViewsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
|
|
|
|
|
//to remove logger.info("ViewsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
|
|
|
|
// viewsStats();
|
|
|
|
|
//to remove logger.info("ViewsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
|
|
|
|
|
|
|
|
|
|
logger.info("DownloadsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
|
|
|
|
downloadsStats();
|
|
|
|
@ -125,6 +156,10 @@ public class PiwikStatsDB {
|
|
|
|
|
public void processEpisciencesLogs() throws Exception {
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
|
|
logger.info("Creating EpisciencesLogDistinct Table");
|
|
|
|
|
createDistinctEpisciencesLog();
|
|
|
|
|
logger.info("Creating EpisciencesLogDistinct Table Created");
|
|
|
|
|
|
|
|
|
|
logger.info("Views Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
|
|
|
|
episciencesViewsStats();
|
|
|
|
|
logger.info("Views Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
|
|
|
|
@ -172,7 +207,7 @@ public class PiwikStatsDB {
|
|
|
|
|
+ ".openaire_views_stats_tmp";
|
|
|
|
|
stmt.executeUpdate(drop_views_stats);
|
|
|
|
|
logger.info("Dropped openaire_views_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
logger.info("Creating openaire_views_stats_tmp table");
|
|
|
|
|
String create_views_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_views_stats_tmp "
|
|
|
|
@ -187,6 +222,71 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(create_views_stats);
|
|
|
|
|
logger.info("Created openaire_views_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 630 in openaire_views_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_630 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_views_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::cfa5301358b9fcbe7aa45b1ceea088c6' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=630 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_630);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 630 in openaire_views_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 662 in openaire_views_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_662 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_views_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::4e86eaf2685a67b743a475f86c7c0086' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=662 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_662);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 662 in openaire_views_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 694 in openaire_views_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_694 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_views_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::f35fd567065af297ae65b621e0a21ae9' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=694 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_694);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 694 in openaire_views_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 725 in openaire_views_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_725 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_views_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::7180cffd6a8e829dacfc2a31b3f72ece' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=725 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_725);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 725 in openaire_views_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 728 in openaire_views_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_728 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_views_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::8b3bac12926cc1d9fb5d68783376971d' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=728 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_728);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 728 in openaire_views_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Creating openaire_pageviews_stats_tmp table");
|
|
|
|
|
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_pageviews_stats_tmp AS SELECT "
|
|
|
|
@ -209,27 +309,27 @@ public class PiwikStatsDB {
|
|
|
|
|
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
|
|
|
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
|
|
|
|
|
|
|
|
|
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
String drop_result_downloads_monthly = "DROP VIEW IF EXISTS "
|
|
|
|
|
+ ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_result_downloads_monthly_tmp";
|
|
|
|
|
stmt.executeUpdate(drop_result_downloads_monthly);
|
|
|
|
|
logger.info("Dropped openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
|
|
|
|
|
logger.info("Creating openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_result_downloads_monthly_tmp "
|
|
|
|
|
+ "AS SELECT entity_id, "
|
|
|
|
|
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
|
|
|
|
|
+ "COUNT(entity_id) as downloads, "
|
|
|
|
|
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
|
|
|
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct where action='download' "
|
|
|
|
|
+ "AND (source_item_type='oaItem' OR source_item_type='repItem') "
|
|
|
|
|
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source "
|
|
|
|
|
+ "ORDER BY source, entity_id, month";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
//to remove logger.info("Dropping openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
// String drop_result_downloads_monthly = "DROP VIEW IF EXISTS "
|
|
|
|
|
// + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
// + ".openaire_result_downloads_monthly_tmp";
|
|
|
|
|
// stmt.executeUpdate(drop_result_downloads_monthly);
|
|
|
|
|
// logger.info("Dropped openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
//
|
|
|
|
|
// logger.info("Creating openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
// String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
// + ".openaire_result_downloads_monthly_tmp "
|
|
|
|
|
// + "AS SELECT entity_id, "
|
|
|
|
|
// + "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
|
|
|
|
|
// + "COUNT(entity_id) as downloads, "
|
|
|
|
|
// + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
|
|
|
|
// + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
|
|
|
|
// + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct where action='download' "
|
|
|
|
|
// + "AND (source_item_type='oaItem' OR source_item_type='repItem') "
|
|
|
|
|
// + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source "
|
|
|
|
|
// + "ORDER BY source, entity_id, month";
|
|
|
|
|
// stmt.executeUpdate(sql);
|
|
|
|
|
//to remove logger.info("Created openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
|
|
|
|
|
logger.info("Dropping openaire_downloads_stats_tmp table");
|
|
|
|
|
String drop_views_stats = "DROP TABLE IF EXISTS "
|
|
|
|
@ -239,7 +339,8 @@ public class PiwikStatsDB {
|
|
|
|
|
logger.info("Dropped openaire_downloads_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Creating openaire_downloads_stats_tmp table");
|
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp AS "
|
|
|
|
|
String sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_downloads_stats_tmp AS "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
|
|
|
@ -251,8 +352,74 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created downloads_stats table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 630 in openaire_downloads_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_630 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_downloads_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::cfa5301358b9fcbe7aa45b1ceea088c6' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=630 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_630);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 630 in openaire_downloads_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 662 in openaire_downloads_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_662 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_downloads_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::4e86eaf2685a67b743a475f86c7c0086' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=662 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_662);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 662 in openaire_downloads_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 694 in openaire_downloads_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_694 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_downloads_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::f35fd567065af297ae65b621e0a21ae9' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=694 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_694);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 694 in openaire_downloads_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 725 in openaire_downloads_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_725 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_downloads_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::7180cffd6a8e829dacfc2a31b3f72ece' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=725 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_725);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 725 in openaire_downloads_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Insert temp missing piwik_ids 728 in openaire_downloads_stats_tmp table");
|
|
|
|
|
String create_views_stats_missing_id_728 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_downloads_stats_tmp "
|
|
|
|
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::8b3bac12926cc1d9fb5d68783376971d' as repository_id, ro.id as result_id, month as date, "
|
|
|
|
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE p.source=728 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
|
|
|
|
+ "GROUP BY ro.id, month "
|
|
|
|
|
+ "ORDER BY ro.id, month ";
|
|
|
|
|
stmt.executeUpdate(create_views_stats_missing_id_728);
|
|
|
|
|
logger.info("Inserted temp missing piwik_ids 728 in openaire_downloads_stats_tmp table");
|
|
|
|
|
|
|
|
|
|
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
|
|
|
|
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp";
|
|
|
|
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".openaire_result_downloads_monthly_tmp";
|
|
|
|
|
logger.info("Dropped openaire_result_downloads_monthly_tmp view ");
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
@ -298,6 +465,44 @@ public class PiwikStatsDB {
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void uploadPangaeaLogs() throws Exception {
|
|
|
|
|
stmt = ConnectDB.getHiveConnection().createStatement();
|
|
|
|
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
|
|
|
|
|
|
|
|
|
// Dropping Pangaea pangaea_views_stats_tmp table
|
|
|
|
|
logger.info("Dropping pangaea_views_stats_tmp table");
|
|
|
|
|
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_views_stats_tmp";
|
|
|
|
|
logger.info("Dropped pangaea_views_stats_tmp table ");
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
|
// Dropping Pangaea pangaea_downloads_stats table
|
|
|
|
|
logger.info("Dropping pangaea_downloads_stats table");
|
|
|
|
|
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_downloads_stats";
|
|
|
|
|
logger.info("Dropped pangaea_downloads_stats table ");
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
|
// Creating Pangaea pangaea_views_stats_tmp table
|
|
|
|
|
logger.info("Creating Pangaea pangaea_views_stats_tmp table");
|
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_views_stats_tmp AS "
|
|
|
|
|
+ "SELECT 'PANGAEA' as source, 're3data_____::9633d1e8c4309c833c2c442abeb0cfeb' as repository_id,"
|
|
|
|
|
+ "r.id as result_id,date, cast(count as BIGINT) as count, 0 as openaire "
|
|
|
|
|
+ "FROM default.pangaeaviews p, " + ConnectDB.getStatsDBSchema()
|
|
|
|
|
+ ".result_oids r where r.oid=p.result_id";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created pangaea_views_stats_tmp table ");
|
|
|
|
|
|
|
|
|
|
// Creating Pangaea pangaea_downloads_stats_tmp table
|
|
|
|
|
logger.info("Creating Pedocs pangaea_downloads_stats_tmp table");
|
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_downloads_stats_tmp AS "
|
|
|
|
|
+ "SELECT 'PANGAEA' as source, 're3data_____::9633d1e8c4309c833c2c442abeb0cfeb' as repository_id,"
|
|
|
|
|
+ "r.id as result_id, date, cast(count as BIGINT) as count, 0 as openaire "
|
|
|
|
|
+ "FROM default.pangaeadownloads p, " + ConnectDB.getStatsDBSchema()
|
|
|
|
|
+ ".result_oids r where r.oid=p.result_id";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created pangaea_downloads_stats_tmp table ");
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void uploadTUDELFTStats() throws Exception {
|
|
|
|
|
stmt = ConnectDB.getHiveConnection().createStatement();
|
|
|
|
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
|
|
|
@ -499,7 +704,8 @@ public class PiwikStatsDB {
|
|
|
|
|
|
|
|
|
|
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() +
|
|
|
|
|
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
|
|
|
|
|
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true) "
|
|
|
|
|
+ "and websiteurl!='https://episciences.org/'";
|
|
|
|
|
|
|
|
|
|
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
|
|
|
|
.prepareStatement(returnEpisciencesJournals);
|
|
|
|
@ -525,12 +731,11 @@ public class PiwikStatsDB {
|
|
|
|
|
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
|
|
|
|
|
+ "AS openaire_referrer, "
|
|
|
|
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageRawDataDBSchema()
|
|
|
|
|
+ ".episcienceslog where action='action' and (source_item_type='oaItem' or "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".episcienceslogdistinct where action='action' and (source_item_type='oaItem' or "
|
|
|
|
|
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
|
|
|
|
|
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
|
|
|
|
|
+ "source ORDER BY source, entity_id";
|
|
|
|
|
|
|
|
|
|
stmt.executeUpdate(create_result_views_monthly);
|
|
|
|
|
logger.info("Created episciencesSuffix_result_views_monthly_tmp table");
|
|
|
|
|
|
|
|
|
@ -547,6 +752,23 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(insertIntoEpisciencesViewsTable);
|
|
|
|
|
logger.info("Inserted episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
|
|
|
|
|
|
|
|
|
|
logger
|
|
|
|
|
.info(
|
|
|
|
|
"Inserting episciencesSuffix_result_views_monthly_tmp for Episciences into EpisciencesViews Table");
|
|
|
|
|
String insertIntoEpisciencesViewsAllTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".episciencesviews SELECT 'Episciences' as source, "
|
|
|
|
|
+ " 'openaire____::6824b298c96ba906a3e6a70593affbf5' as repository_id, ro.id as result_id, month as date,"
|
|
|
|
|
+ " max(views) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp p,"
|
|
|
|
|
+ ConnectDB.getStatsDBSchema()
|
|
|
|
|
+ ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
|
|
|
|
|
logger
|
|
|
|
|
.info(
|
|
|
|
|
"Inserted episciencesSuffix_result_views_monthly_tmp for Episciences into EpisciencesViews Table");
|
|
|
|
|
|
|
|
|
|
stmt.executeUpdate(insertIntoEpisciencesViewsAllTable);
|
|
|
|
|
|
|
|
|
|
stmt.executeUpdate(dropepisciencesSuffixView);
|
|
|
|
|
logger.info("Dropped episciencesSuffix_result_views_monthly_tmp view");
|
|
|
|
|
}
|
|
|
|
@ -576,7 +798,8 @@ public class PiwikStatsDB {
|
|
|
|
|
|
|
|
|
|
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() +
|
|
|
|
|
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
|
|
|
|
|
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true) "
|
|
|
|
|
+ "and websiteurl!='https://episciences.org/'";
|
|
|
|
|
|
|
|
|
|
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
|
|
|
|
.prepareStatement(returnEpisciencesJournals);
|
|
|
|
@ -600,8 +823,8 @@ public class PiwikStatsDB {
|
|
|
|
|
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
|
|
|
|
|
+ "AS openaire_referrer, "
|
|
|
|
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageRawDataDBSchema()
|
|
|
|
|
+ ".episcienceslog where action='download' and (source_item_type='oaItem' or "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".episcienceslogdistinct where action='download' and (source_item_type='oaItem' or "
|
|
|
|
|
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
|
|
|
|
|
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
|
|
|
|
|
+ "source ORDER BY source, entity_id";
|
|
|
|
@ -622,6 +845,23 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(insertIntoEpisciencesDownloadsTable);
|
|
|
|
|
logger.info("Inserted episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
|
|
|
|
|
|
|
|
|
|
logger
|
|
|
|
|
.info(
|
|
|
|
|
"Inserting episciencesSuffix_result_downloads_monthly_tmp for Episciences into EpisciencesDownloadsTable");
|
|
|
|
|
String insertIntoEpisciencesDownloadsAllTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".episciencesdownloads SELECT 'Episciences' as source, "
|
|
|
|
|
+ " 'openaire____::6824b298c96ba906a3e6a70593affbf5' as repository_id, ro.id as result_id, month as date,"
|
|
|
|
|
+ " max(views) AS count, max(openaire_referrer) AS openaire "
|
|
|
|
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp p,"
|
|
|
|
|
+ ConnectDB.getStatsDBSchema()
|
|
|
|
|
+ ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
|
|
|
|
|
|
|
|
|
|
stmt.executeUpdate(insertIntoEpisciencesDownloadsAllTable);
|
|
|
|
|
logger
|
|
|
|
|
.info(
|
|
|
|
|
"Inserted episciencesSuffix_result_downloads_monthly_tmp for Episciences into EpisciencesDownloadsTable");
|
|
|
|
|
|
|
|
|
|
stmt.executeUpdate(dropepisciencesSuffixDownloads);
|
|
|
|
|
logger.info("Dropped episciencesSuffix_result_downloads_monthly_tmp view");
|
|
|
|
|
|
|
|
|
@ -767,9 +1007,15 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Dropped Table tbl_all_r5_metrics");
|
|
|
|
|
|
|
|
|
|
logger.info("Create Table tbl_all_r5_metrics");
|
|
|
|
|
// All CoP R5 metrics Table
|
|
|
|
|
logger.info("Drop Table counter_r5_stats_with_metrics");
|
|
|
|
|
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics ";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Dropped Table counter_r5_stats_with_metrics");
|
|
|
|
|
|
|
|
|
|
logger.info("Create Table counter_r5_stats_with_metrics");
|
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".tbl_all_r5_metrics as "
|
|
|
|
|
+ ".counter_r5_stats_with_metrics as "
|
|
|
|
|
+ "WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id, "
|
|
|
|
|
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
|
|
|
|
|
+ "coalesce(vs.unique_item_investigations, 0) as unique_item_investigations, "
|
|
|
|
@ -793,7 +1039,7 @@ public class PiwikStatsDB {
|
|
|
|
|
+ "FROM tmp2 AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests "
|
|
|
|
|
+ "AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created Table tbl_all_r5_metrics");
|
|
|
|
|
logger.info("Created Table counter_r5_stats_with_metrics");
|
|
|
|
|
stmt.close();
|
|
|
|
|
ConnectDB.getHiveConnection().close();
|
|
|
|
|
|
|
|
|
@ -857,6 +1103,13 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Pedocs views updated to views_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting Pangaea views stats
|
|
|
|
|
logger.info("Inserting Pangaea old data to views_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_views_stats_tmp";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Pangaea views updated to views_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting TUDELFT views stats
|
|
|
|
|
logger.info("Inserting TUDELFT data to views_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
|
|
|
@ -878,6 +1131,12 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("B2SHARE views updated to views_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting Datacite views stats
|
|
|
|
|
logger.info("Inserting Datacite views to views_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_views";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
|
logger.info("Creating downloads_stats table");
|
|
|
|
|
String createDownloadsStats = "CREATE TABLE IF NOT EXISTS "
|
|
|
|
|
+ ConnectDB.getUsageStatsDBSchema()
|
|
|
|
@ -893,7 +1152,7 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Inserted OpenAIRE data to downloads_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting Episciences views stats
|
|
|
|
|
// Inserting Episciences downloads stats
|
|
|
|
|
logger.info("Inserting Episciences data to downloads_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesdownloads";
|
|
|
|
@ -907,6 +1166,13 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Inserted Pedocs data to downloads_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting Pangaea downloads stats
|
|
|
|
|
logger.info("Inserting Pangaea old data to downloads_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_downloads_stats_tmp";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Pangaea downloads updated to downloads_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting TUDELFT downloads stats
|
|
|
|
|
logger.info("Inserting TUDELFT data to downloads_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
|
|
|
@ -920,6 +1186,7 @@ public class PiwikStatsDB {
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Inserted B2SHARE data to downloads_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting Lareferencia downloads stats
|
|
|
|
|
logger.info("Inserting LaReferencia data to downloads_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
|
|
|
@ -934,14 +1201,13 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("IRUS downloads updated to downloads_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting IRUS_R5 downloads stats
|
|
|
|
|
// Inserting IRUS_R5 views stats
|
|
|
|
|
logger.info("Inserting IRUS_R5 views to views_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
|
|
|
|
+ "SELECT source, repository_id, result_id, `date`, views, openaire FROM "
|
|
|
|
|
+ ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".irus_R5_stats_tmp";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("IRUS_R5 views updated to views_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting IRUS_R5 downloads stats
|
|
|
|
|
logger.info("Inserting IRUS_R5 data to downloads_stats");
|
|
|
|
@ -950,7 +1216,6 @@ public class PiwikStatsDB {
|
|
|
|
|
+ ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".irus_R5_stats_tmp";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("IRUS_R5 downloads updated to downloads_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting SARC-OJS downloads stats
|
|
|
|
|
logger.info("Inserting SARC data to downloads_stats");
|
|
|
|
@ -959,19 +1224,11 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("SARC-OJS downloads updated to downloads_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting Datacite views stats
|
|
|
|
|
logger.info("Inserting Datacite views to views_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_views";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Datacite views updated to views_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting Datacite downloads stats
|
|
|
|
|
logger.info("Inserting Datacite downloads to downloads_stats");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_downloads";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Datacite downloads updated to downloads_stats");
|
|
|
|
|
|
|
|
|
|
logger.info("Creating pageviews_stats table");
|
|
|
|
|
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
@ -986,28 +1243,6 @@ public class PiwikStatsDB {
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
|
logger.info("Dropping full_dates table");
|
|
|
|
|
String dropFullDates = "DROP TABLE IF EXISTS "
|
|
|
|
|
+ ConnectDB.getUsageStatsDBSchema()
|
|
|
|
|
+ ".full_dates";
|
|
|
|
|
stmt.executeUpdate(dropFullDates);
|
|
|
|
|
logger.info("Dropped full_dates table");
|
|
|
|
|
|
|
|
|
|
Calendar startCalendar = Calendar.getInstance();
|
|
|
|
|
startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
|
|
|
|
|
Calendar endCalendar = Calendar.getInstance();
|
|
|
|
|
int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
|
|
|
|
|
int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
|
|
|
|
|
|
|
|
|
|
logger.info("Creating full_dates table");
|
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".full_dates AS "
|
|
|
|
|
+ "SELECT from_unixtime(unix_timestamp(cast(add_months(from_date,i) AS DATE)), 'yyyy/MM') AS txn_date "
|
|
|
|
|
+ "FROM (SELECT DATE '2016-01-01' AS from_date) p "
|
|
|
|
|
+ "LATERAL VIEW "
|
|
|
|
|
+ "posexplode(split(space(" + diffMonth + "),' ')) pe AS i,x";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created full_dates table");
|
|
|
|
|
|
|
|
|
|
logger.info("Inserting data to usage_stats");
|
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats AS "
|
|
|
|
|
+ "SELECT coalesce(ds.source, vs.source) as source, "
|
|
|
|
@ -1022,15 +1257,77 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Inserted data to usage_stats");
|
|
|
|
|
|
|
|
|
|
// Dropping project_stats table
|
|
|
|
|
logger.info("Dropping project_stats table");
|
|
|
|
|
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".project_stats";
|
|
|
|
|
logger.info("Dropped project_stats table ");
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
|
// Dropping datasource_stats table
|
|
|
|
|
logger.info("Dropping datasource_stats table");
|
|
|
|
|
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".datasource_stats";
|
|
|
|
|
logger.info("Dropped datasource_stats table ");
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
|
logger.info("Inserting data to project_downloads");
|
|
|
|
|
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".project_downloads as "
|
|
|
|
|
+ " select pr.id, sum(count) downloads, sum(openaire) openaire_downloads,`date` "
|
|
|
|
|
+ " from " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
|
|
|
|
+ " join " + ConnectDB.getStatsDBSchema() + ".project_results pr on result_id=pr.result "
|
|
|
|
|
+ " join " + ConnectDB.getStatsDBSchema() + ".project p on p.id=pr.id "
|
|
|
|
|
+ " group by pr.id,`date`";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Inserted data to projects_downloads");
|
|
|
|
|
|
|
|
|
|
logger.info("Inserting data to project_views");
|
|
|
|
|
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".project_views as "
|
|
|
|
|
+ " select pr.id, sum(count) views, sum(openaire) openaire_views,`date` "
|
|
|
|
|
+ " from " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
|
|
|
|
+ " join " + ConnectDB.getStatsDBSchema() + ".project_results pr on result_id=pr.result "
|
|
|
|
|
+ " join " + ConnectDB.getStatsDBSchema() + ".project p on p.id=pr.id "
|
|
|
|
|
+ " group by pr.id,`date`";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Inserted data to project_views");
|
|
|
|
|
|
|
|
|
|
logger.info("Inserting data to project_stats");
|
|
|
|
|
sql = " CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".project_stats as "
|
|
|
|
|
+ " SELECT coalesce(pv.id, pd.id) as id, coalesce(pd.`date`, pv.`date`) as `date`, "
|
|
|
|
|
+ " coalesce(pv.views, 0) as views, coalesce(pd.downloads, 0) as downloads, "
|
|
|
|
|
+ " coalesce(pv.openaire_views,0) as openaire_views,coalesce(pd.openaire_downloads, 0) as openaire_downloads "
|
|
|
|
|
+ " FROM " + ConnectDB.getUsageStatsDBSchema() + ".project_downloads pd "
|
|
|
|
|
+ " FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".project_views pv "
|
|
|
|
|
+ " ON pd.id=pv.id WHERE pd.`date`=pv.`date`";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
|
logger.info("Inserted data to project_stats");
|
|
|
|
|
|
|
|
|
|
logger.info("Inserting data to datasource_stats");
|
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".datasource_stats AS "
|
|
|
|
|
+ " with datasource_views as "
|
|
|
|
|
+ " (select repository_id, sum(views) views, sum(openaire_views) openaire_views,`date` "
|
|
|
|
|
+ " from " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats group by repository_id,`date`), "
|
|
|
|
|
+ " datasource_downloads as "
|
|
|
|
|
+ " (select repository_id, sum(downloads) downloads,sum(openaire_downloads) openaire_downloads,`date` "
|
|
|
|
|
+ " from " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats group by repository_id,`date`)"
|
|
|
|
|
+ " SELECT coalesce(dv.repository_id, dd.repository_id) as repository_id, coalesce(dd.`date`, dv.`date`) as `date`,"
|
|
|
|
|
+ " coalesce(dv.views, 0) as views, coalesce(dd.downloads, 0) as downloads, "
|
|
|
|
|
+ " coalesce(dv.openaire_views) as openaire_views,coalesce(dd.openaire_downloads, 0) as openaire_downloads "
|
|
|
|
|
+ " FROM datasource_downloads dd "
|
|
|
|
|
+ " FULL OUTER JOIN "
|
|
|
|
|
+ " datasource_views dv ON dd.repository_id=dv.repository_id WHERE dd.`date`=dv.`date`";
|
|
|
|
|
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Inserted data to datasource_stats");
|
|
|
|
|
|
|
|
|
|
// Inserting LaReferencia CoP R5 Metrics
|
|
|
|
|
logger.info("Inserting Lareferencia data to tbl_all_r5_metrics");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
|
|
|
|
|
logger.info("Inserting Lareferencia data to counter_r5_stats_with_metrics");
|
|
|
|
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics "
|
|
|
|
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_all_r5_metrics";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
|
|
// Inserting IRUS-UK CoP R5 Metrics
|
|
|
|
|
logger.info("Inserting IRUS-UK data into tbl_all_r5_metrics");
|
|
|
|
|
String insertΡ5Stats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
|
|
|
|
|
logger.info("Inserting IRUS-UK data into counter_r5_stats_with_metrics");
|
|
|
|
|
String insertΡ5Stats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics "
|
|
|
|
|
+ "SELECT s.source, d.id AS repository_id, "
|
|
|
|
|
+ "ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, "
|
|
|
|
|
+ "s.unique_item_investigations , s.total_item_investigations, "
|
|
|
|
@ -1040,7 +1337,7 @@ public class PiwikStatsDB {
|
|
|
|
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
|
|
|
|
+ "WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK'";
|
|
|
|
|
stmt.executeUpdate(insertΡ5Stats);
|
|
|
|
|
logger.info("Inserted IRUS-UK data into tbl_all_r5_metrics");
|
|
|
|
|
logger.info("Inserted IRUS-UK data into counter_r5_stats_with_metrics");
|
|
|
|
|
|
|
|
|
|
logger.info("Building views at permanent DB starts at: " + new Timestamp(System.currentTimeMillis()));
|
|
|
|
|
|
|
|
|
@ -1088,6 +1385,28 @@ public class PiwikStatsDB {
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created view on usage_stats on permanent usagestats DB");
|
|
|
|
|
|
|
|
|
|
logger.info("Dropping view projects_stats on permanent usagestats DB");
|
|
|
|
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".projects_stats";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Dropped view on projects_stats on permanent usagestats DB");
|
|
|
|
|
|
|
|
|
|
logger.info("Create view on project_stats on permanent usagestats DB");
|
|
|
|
|
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".project_stats"
|
|
|
|
|
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".project_stats";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created view on project_stats on permanent usagestats DB");
|
|
|
|
|
|
|
|
|
|
logger.info("Dropping view datasource_stats on permanent usagestats DB");
|
|
|
|
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".datasource_stats";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Dropped view on projects_stats on permanent usagestats DB");
|
|
|
|
|
|
|
|
|
|
logger.info("Create view on datasource_stats on permanent usagestats DB");
|
|
|
|
|
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsagestatsPermanentDBSchema() + ".datasource_stats"
|
|
|
|
|
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".datasource_stats";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created view on project_stats on permanent usagestats DB");
|
|
|
|
|
|
|
|
|
|
logger.info("Dropping view COUNTER_R5_Metrics on permanent usagestats DB");
|
|
|
|
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".counter_r5_stats_with_metrics";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
@ -1096,7 +1415,7 @@ public class PiwikStatsDB {
|
|
|
|
|
logger.info("Create view on COUNTER_R5_Metrics on permanent usagestats DB");
|
|
|
|
|
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema()
|
|
|
|
|
+ ".counter_r5_stats_with_metrics"
|
|
|
|
|
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics";
|
|
|
|
|
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics";
|
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
logger.info("Created view on COUNTER_R5_Metrics on permanent usagestats DB");
|
|
|
|
|
|
|
|
|
|