dnet-hadoop/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/PiwikStatsDB.java

1113 lines
58 KiB
Java
Raw Normal View History

2023-01-13 14:21:04 +01:00
package eu.dnetlib.oa.graph.usagestatsbuild.export;
import java.sql.*;
import java.text.SimpleDateFormat;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author D. Pierrakos, S. Zoupanos
*/
public class PiwikStatsDB {
private String logPath;
private Statement stmt = null;
private static final Logger logger = LoggerFactory.getLogger(PiwikStatsDB.class);
public PiwikStatsDB() throws Exception {
}
public void recreateDBAndTables() throws Exception {
this.createDatabase();
// The piwiklog table is not needed since it is built
// on top of JSON files
//////////// this.createTmpTables();
}
private void createDatabase() throws Exception {
try {
stmt = ConnectDB.getHiveConnection().createStatement();
logger.info("Dropping usagestats DB: " + ConnectDB.getUsageStatsDBSchema());
String dropDatabase = "DROP DATABASE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + " CASCADE";
stmt.executeUpdate(dropDatabase);
} catch (Exception e) {
logger.error("Failed to drop database: " + e);
throw new Exception("Failed to drop database: " + e.toString(), e);
}
try {
logger.info("Creating usagestats DB: " + ConnectDB.getUsageStatsDBSchema());
String createDatabase = "CREATE DATABASE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema();
stmt.executeUpdate(createDatabase);
logger.info("Usagestats DB created: " + ConnectDB.getUsageStatsDBSchema());
} catch (Exception e) {
logger.error("Failed to create database: " + e);
throw new Exception("Failed to create database: " + e.toString(), e);
}
try {
stmt = ConnectDB.getHiveConnection().createStatement();
logger.info("Creating permanent usagestats DB: " + ConnectDB.getUsagestatsPermanentDBSchema());
String createPermanentDatabase = "CREATE DATABASE IF NOT EXISTS "
+ ConnectDB.getUsagestatsPermanentDBSchema();
stmt.executeUpdate(createPermanentDatabase);
logger.info("Created permanent usagestats DB: " + ConnectDB.getUsagestatsPermanentDBSchema());
} catch (Exception e) {
logger.error("Failed to create database: " + e);
throw new Exception("Failed to create database: " + e.toString(), e);
}
}
public void createDistinctPiwikLog() throws Exception {
logger.info("Initialising DB properties");
ConnectDB.init();
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping piwiklogdistinct");
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct";
stmt.executeUpdate(sql);
logger.info("Dropped piwiklogdistinct");
logger.info("Creating piwiklogdistinct table");
// Create Piwiklogdistinct table - This table should exist
String sqlCreateTablePiwikLogDistinct = "CREATE TABLE IF NOT EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".piwiklogdistinct(source INT, id_visit STRING, country STRING, action STRING, url STRING, "
+ "entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) "
+ "clustered by (source, id_visit, action, timestamp, entity_id) "
+ "into 100 buckets stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(sqlCreateTablePiwikLogDistinct);
logger.info("Created piwiklogdistinct table");
logger.info("Inserting data to piwiklogdistinct");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "SELECT DISTINCT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".piwiklog WHERE entity_id is not null";
stmt.executeUpdate(sql);
logger.info("Inserted data to piwiklogdistinct");
}
public void processLogs() throws Exception {
try {
logger.info("ViewsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
viewsStats();
logger.info("ViewsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
logger.info("DownloadsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
downloadsStats();
logger.info("DownloadsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
logger.info("COUNTER CoP R5 metrics processing starts at: " + new Timestamp(System.currentTimeMillis()));
createCoPR5Tables();
logger.info("COUNTER CoP R5 metrics processing ends at: " + new Timestamp(System.currentTimeMillis()));
} catch (Exception e) {
logger.error("Failed to process logs: " + e);
throw new Exception("Failed to process logs: " + e.toString(), e);
}
}
public void processEpisciencesLogs() throws Exception {
try {
logger.info("Views Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
episciencesViewsStats();
logger.info("Views Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
logger.info("downloads Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
episciencesDownloadsStats();
logger.info("Downloads Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
} catch (Exception e) {
logger.error("Failed to process logs: " + e);
throw new Exception("Failed to process logs: " + e.toString(), e);
}
}
public void viewsStats() throws Exception {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping openaire_result_views_monthly_tmp view");
String drop_result_views_monthly = "DROP VIEW IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".openaire_piwikresult_views_monthly_tmp";
stmt.executeUpdate(drop_result_views_monthly);
logger.info("Dropped openaire_result_views_monthly_tmp view");
logger.info("Creating openaire_result_views_monthly_tmp view");
String create_result_views_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ ".openaire_result_views_monthly_tmp "
+ "AS SELECT entity_id, "
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
+ "AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".piwiklogdistinct where action='action' and (source_item_type='oaItem' or "
+ "source_item_type='repItem') "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
+ "source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_views_monthly);
logger.info("Created openaire_result_views_monthly_tmp table");
logger.info("Dropping openaire_views_stats_tmp table");
String drop_views_stats = "DROP TABLE IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".openaire_views_stats_tmp";
stmt.executeUpdate(drop_views_stats);
logger.info("Dropped openaire_views_stats_tmp table");
logger.info("Creating openaire_views_stats_tmp table");
String create_views_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".openaire_views_stats_tmp "
+ "AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
+ "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month "
+ "ORDER BY d.id, ro.id, month ";
stmt.executeUpdate(create_views_stats);
logger.info("Created openaire_views_stats_tmp table");
logger.info("Creating openaire_pageviews_stats_tmp table");
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".openaire_pageviews_stats_tmp AS SELECT "
+ "'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=" + ExecuteWorkflow.portalMatomoID
+ " AND p.source=d.piwik_id and p.id=ro.id AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
+ "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month "
+ "ORDER BY d.id, ro.id, month ";
stmt.executeUpdate(create_pageviews_stats);
logger.info("Created pageviews_stats table");
stmt.close();
// ConnectDB.getHiveConnection().close();
}
private void downloadsStats() throws Exception {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
String drop_result_downloads_monthly = "DROP VIEW IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".openaire_result_downloads_monthly_tmp";
stmt.executeUpdate(drop_result_downloads_monthly);
logger.info("Dropped openaire_result_downloads_monthly_tmp view");
logger.info("Creating openaire_result_downloads_monthly_tmp view");
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ ".openaire_result_downloads_monthly_tmp "
+ "AS SELECT entity_id, "
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ "COUNT(entity_id) as downloads, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct where action='download' "
+ "AND (source_item_type='oaItem' OR source_item_type='repItem') "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source "
+ "ORDER BY source, entity_id, month";
stmt.executeUpdate(sql);
logger.info("Created openaire_result_downloads_monthly_tmp view");
logger.info("Dropping openaire_downloads_stats_tmp table");
String drop_views_stats = "DROP TABLE IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".openaire_downloads_stats_tmp";
stmt.executeUpdate(drop_views_stats);
logger.info("Dropped openaire_downloads_stats_tmp table");
logger.info("Creating openaire_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id and p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
+ "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month "
+ "ORDER BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created downloads_stats table");
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp";
logger.info("Dropped openaire_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
stmt.close();
// ConnectDB.getHiveConnection().close();
}
public void uploadOldPedocs() throws Exception {
stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Dropping Pedocs pedocs_views_stats_tmp table
logger.info("Dropping Pedocs pedocs_views_stats_tmp table");
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp";
logger.info("Dropped pedocs_views_stats_tmp table ");
stmt.executeUpdate(sql);
// Dropping Pedocs pedocs_downloads_stats table
logger.info("Dropping pedocs_downloads_stats table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats";
logger.info("Dropped pedocs_downloads_stats table ");
stmt.executeUpdate(sql);
// Creating Pedocs pedocs_views_stats_tmp table
logger.info("Creating Pedocs pedocs_views_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, 'opendoar____::ab1a4d0dd4d48a2ba1077c4494791306' as repository_id,"
+ "r.id as result_id,date,counter_abstract as count, 0 as openaire "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".pedocsoldviews p, " + ConnectDB.getStatsDBSchema()
+ ".result_oids r where r.oid=p.identifier";
stmt.executeUpdate(sql);
logger.info("Created pedocs_views_stats_tmp table ");
// Creating Pedocs pedocs_downloads_stats_tmp table
logger.info("Creating Pedocs pedocs_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, 'opendoar____::ab1a4d0dd4d48a2ba1077c4494791306' as repository_id,"
+ "r.id as result_id, date, counter as count, 0 as openaire "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".pedocsolddownloads p, " + ConnectDB.getStatsDBSchema()
+ ".result_oids r where r.oid=p.identifier";
stmt.executeUpdate(sql);
logger.info("Created pedocs_downloads_stats_tmp table ");
}
public void uploadTUDELFTStats() throws Exception {
stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Dropping TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Dropping TUDELFT tudelft_result_views_monthly_tmp view");
String sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp";
logger.info("Dropped tudelft_result_views_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Dropping TUDELFT tudelft_result_downloads_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp";
logger.info("Dropped tudelft_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping TUDELFT tudelft_views_stats_tmp table
logger.info("Dropping TUDELFT tudelft_views_stats_tmp table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp";
logger.info("Dropped tudelft_views_stats_tmp table ");
stmt.executeUpdate(sql);
// Dropping TUDELFT tudelft_downloads_stats_tmp table
logger.info("Dropping TUDELFT tudelft_downloads_stats_tmp table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp";
logger.info("Dropped tudelft_downloads_stats_tmp table ");
stmt.executeUpdate(sql);
// Creating TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Creating TUDELFT tudelft_result_views_monthly_tmp view");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp "
+ "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='action' and (source_item_type='oaItem' or source_item_type='repItem') and source=252 "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
logger.info("Created tudelft_result_views_monthly_tmp view ");
// Creating TUDELFT tudelft_views_stats_tmp table
logger.info("Creating TUDELFT tudelft_views_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".tudelft_result_views_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE concat('tud:',p.id)=ro.oid and d.id='opendoar____::c9892a989183de32e976c6f04e700201' "
+ "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
stmt.executeUpdate(sql);
logger.info("Created TUDELFT tudelft_views_stats_tmp table");
// Creating TUDELFT tudelft_result_downloads_monthly_tmp view
logger.info("Creating TUDELFT tudelft_result_downloads_monthly_tmp view");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp "
+ "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='download' and (source_item_type='oaItem' or source_item_type='repItem') and source=252 "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
logger.info("Created tudelft_result_downloads_monthly_tmp view ");
// Creating TUDELFT tudelft_downloads_stats_tmp table
logger.info("Creating TUDELFT tudelft_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".tudelft_result_downloads_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE concat('tud:',p.id)=ro.oid and d.id='opendoar____::c9892a989183de32e976c6f04e700201' "
+ "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
stmt.executeUpdate(sql);
logger.info("Created TUDELFT tudelft_downloads_stats_tmp table");
// Dropping TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Dropping TUDELFT tudelft_result_views_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp";
logger.info("Dropped tudelft_result_views_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Dropping TUDELFT tudelft_result_downloads_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp";
logger.info("Dropped tudelft_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
}
public void uploadB2SHAREStats() throws Exception {
stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Dropping B2SHARE b2share_result_views_monthly_tmp view
logger.info("Dropping B2SHARE b2share_result_views_monthly_tmp view");
String sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp";
logger.info("Dropped b2share_result_views_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping B2SHARE b2share_result_views_monthly_tmp view
logger.info("Dropping b2SHARE b2share_result_downloads_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp";
logger.info("Dropped b2share_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping B2SHARE b2share_views_stats_tmp table
logger.info("Dropping B2SHARE b2share_views_stats_tmp table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp";
logger.info("Dropped b2share_views_stats_tmp table ");
stmt.executeUpdate(sql);
// Dropping B2SHARE b2share_downloads_stats_tmp table
logger.info("Dropping B2SHARE b2share_downloads_stats_tmp table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
logger.info("Dropped b2share_downloads_stats_tmp table ");
stmt.executeUpdate(sql);
// Creating B2SHARE b2share_result_views_monthly_tmp view
logger.info("Creating B2SHARE b2share_result_views_monthly_tmp view");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp "
+ "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='action' and (source_item_type='oaItem' or source_item_type='repItem') and source=412 "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
logger.info("Created b2share_result_views_monthly_tmp view ");
// Creating B2SHARE b2share_views_stats_tmp table
logger.info("Creating B2SHARE b2share_views_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp AS "
+ "SELECT 'B2SHARE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".b2share_result_views_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.id=ro.oid and d.id='re3data_____::ad3609c351bd520edf6f10f5e0d9b877' "
+ "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
stmt.executeUpdate(sql);
logger.info("Created B2SHARE b2share_views_stats_tmp table");
// Creating B2SHARE b2share_result_downloads_monthly_tmp view
logger.info("Creating B2SHARE b2share_result_downloads_monthly_tmp view");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp "
+ "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='download' and (source_item_type='oaItem' or source_item_type='repItem') and source=412 "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
logger.info("Created b2share_result_downloads_monthly_tmp view ");
// Creating B2SHARE b2share_downloads_stats_tmp table
logger.info("Creating B2SHARE b2share_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp AS "
+ "SELECT 'B2SHARE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".b2share_result_downloads_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.id=ro.oid and d.id='re3data_____::ad3609c351bd520edf6f10f5e0d9b877' "
+ "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
stmt.executeUpdate(sql);
logger.info("Created B2SHARE b2share_downloads_stats_tmp table");
// Dropping B2SHARE b2share_result_views_monthly_tmp view
logger.info("Dropping B2SHARE b2share_result_views_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp";
logger.info("Dropped b2share_result_views_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping B2SHARE b2share_result_views_monthly_tmp view
logger.info("Dropping B2SHARE b2share_result_downloads_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp";
logger.info("Dropped b2share_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
}
public void episciencesViewsStats() throws Exception {
logger.info("Creating episciences Views");
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping Episcience Views Table");
String dropEpisciencesViewsTable = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesviews ";
stmt.executeUpdate(dropEpisciencesViewsTable);
logger.info("Dropped Episcience Views Table");
logger.info("Creating Episcience Views Table");
String createEpisciencesViewsTable = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesviews (source STRING, repository_id STRING, result_id STRING, date STRING, count INT, openaire INT)"
+ " clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true') ";
stmt.executeUpdate(createEpisciencesViewsTable);
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
+ ConnectDB.getStatsDBSchema() +
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
.prepareStatement(returnEpisciencesJournals);
ResultSet rs = st.executeQuery();
while (rs.next()) {
String journal_openaire_id = rs.getString(1);
String episciencesSuffix = rs.getString(2);
logger.info("Working on journal_id:" + journal_openaire_id + " suffix:" + episciencesSuffix);
logger.info("Dropping episciencesSuffix_result_views_monthly_tmp table");
String dropepisciencesSuffixView = "DROP VIEW " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp";
// Statement stmtRS = ConnectDB.getHiveConnection().createStatement();
stmt.executeUpdate(dropepisciencesSuffixView);
logger.info("Dropped episciencesSuffix_result_views_monthly_tmp table");
logger.info("Creating episciencesSuffix_result_views_monthly_tmp table");
String create_result_views_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp "
+ "AS SELECT entity_id, "
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
+ "AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema()
+ ".episcienceslog where action='action' and (source_item_type='oaItem' or "
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
+ "source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_views_monthly);
logger.info("Created episciencesSuffix_result_views_monthly_tmp table");
logger.info("Inserting episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
String insertIntoEpisciencesViewsTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesviews SELECT 'Episciences' as source, '"
+ journal_openaire_id + "' as repository_id, ro.id as result_id, month as date,"
+ " max(views) AS count, max(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp p,"
+ ConnectDB.getStatsDBSchema()
+ ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
stmt.executeUpdate(insertIntoEpisciencesViewsTable);
logger.info("Inserted episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
stmt.executeUpdate(dropepisciencesSuffixView);
logger.info("Dropped episciencesSuffix_result_views_monthly_tmp view");
}
rs.close();
logger.info("Episciences Views Created");
}
public void episciencesDownloadsStats() throws Exception {
logger.info("Creating episciences Downloads");
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping Episcience Downloads Table");
String dropEpisciencesDownloadsTable = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesvdownloads ";
stmt.executeUpdate(dropEpisciencesDownloadsTable);
logger.info("Dropped Episcience Downloads Table");
logger.info("Creating Episcience Downloads Table");
String createEpisciencesDownloadsTable = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesdownloads (source STRING, repository_id STRING, result_id STRING, date STRING, count INT, openaire INT)"
+ " clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true') ";
stmt.executeUpdate(createEpisciencesDownloadsTable);
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
+ ConnectDB.getStatsDBSchema() +
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
.prepareStatement(returnEpisciencesJournals);
ResultSet rs = st.executeQuery();
while (rs.next()) {
String journal_openaire_id = rs.getString(1);
String episciencesSuffix = rs.getString(2);
logger.info("Working on journal_id:" + journal_openaire_id + " suffix:" + episciencesSuffix);
logger.info("Dropping episciencesSuffix_result_downloads_monthly_tmp table");
String dropepisciencesSuffixDownloads = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp";
stmt.executeUpdate(dropepisciencesSuffixDownloads);
logger.info("Creating episciencesSuffix_result_downloads_monthly_tmp table");
String create_result_downloads_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp "
+ "AS SELECT entity_id, "
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
+ "AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema()
+ ".episcienceslog where action='download' and (source_item_type='oaItem' or "
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
+ "source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_downloads_monthly);
logger.info("Created episciencesSuffix_result_downloads_monthly_tmp table");
logger.info("Inserting episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
String insertIntoEpisciencesDownloadsTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesdownloads SELECT 'Episciences' as source, '"
+ journal_openaire_id + "' as repository_id, ro.id as result_id, month as date,"
+ " max(views) AS count, max(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp p,"
+ ConnectDB.getStatsDBSchema()
+ ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
stmt.executeUpdate(insertIntoEpisciencesDownloadsTable);
logger.info("Inserted episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
stmt.executeUpdate(dropepisciencesSuffixDownloads);
logger.info("Dropped episciencesSuffix_result_downloads_monthly_tmp view");
}
rs.close();
}
private void createCoPR5Tables() throws Exception {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Unique Item Investigations
//REMOVE sessionid from total
logger.info("Create View Unique_Item_Investigations");
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_investigations "
+ "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
stmt.executeUpdate(sql);
logger.info("Created View Unique_Item_Investigations");
logger.info("Drop Table Unique_Item_Investigations");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations ";
stmt.executeUpdate(sql);
logger.info("Dropped Table Unique_Item_Investigations");
logger.info("Create Table tbl_unique_item_investigations");
sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations as "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "sum(unique_item_investigations) AS unique_item_investigations, sum(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_investigations p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
+ "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_unique_item_investigations");
// Total Item Investigations
logger.info("Create View view_total_item_investigations");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_investigations "
+ "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) AS total_item_investigations, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
stmt.executeUpdate(sql);
logger.info("Created View view_total_item_investigations");
logger.info("Drop Table tbl_total_item_investigations");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations ";
stmt.executeUpdate(sql);
logger.info("Dropped Table tbl_total_item_investigations");
logger.info("Create Table tbl_total_item_investigations");
sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations AS "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "sum(total_item_investigations) AS total_item_investigations, sum(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_investigations p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
+ "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_total_item_investigations");
// Unique Item Requests
logger.info("Create View view_unique_item_requests");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_requests AS "
+ "SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
stmt.executeUpdate(sql);
logger.info("Created View view_unique_item_requests");
logger.info("Drop Table Unique_Item_Requests");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests ";
stmt.executeUpdate(sql);
logger.info("Dropped Table Unique_Item_Requests");
logger.info("Create Table tbl_unique_item_requests");
sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests as "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "sum(unique_item_requests) AS unique_item_requests, sum(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_requests p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
+ "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_unique_item_requests");
// Total Item Requests
logger.info("Create View view_total_item_requests");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_requests "
+ "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) AS total_item_requests, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
stmt.executeUpdate(sql);
logger.info("Created View view_total_item_requests");
logger.info("Drop Table tbl_total_item_requests");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests ";
stmt.executeUpdate(sql);
logger.info("Dropped Table tbl_total_item_requests");
logger.info("Create Table tbl_total_item_requests");
sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests as "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "sum(total_item_requests) AS total_item_requests, sum(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_requests p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
+ "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_total_item_requests");
// All CoP R5 metrics Table
logger.info("Drop Table tbl_all_r5_metrics");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics ";
stmt.executeUpdate(sql);
logger.info("Dropped Table tbl_all_r5_metrics");
logger.info("Create Table tbl_all_r5_metrics");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".tbl_all_r5_metrics as "
+ "WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id, "
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
+ "coalesce(vs.unique_item_investigations, 0) as unique_item_investigations, "
+ "coalesce(ds.total_item_investigations, 0) as total_item_investigations "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations AS vs "
+ "FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations AS ds "
+ " ON ds.source=vs.source AND ds.result_id=vs.result_id AND ds.date=vs.date), "
+ "tmp2 AS (select coalesce (ds.repository_id, vs.repository_id) as repository_id, "
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
+ "coalesce(ds.total_item_investigations, 0) as total_item_investigations, "
+ "coalesce(ds.unique_item_investigations, 0) as unique_item_investigations, "
+ " coalesce(vs.unique_item_requests, 0) as unique_item_requests FROM tmp1 "
+ "AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests AS vs "
+ "ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date) "
+ "SELECT 'OpenAIRE' as source, coalesce (ds.repository_id, vs.repository_id) as repository_id, "
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
+ "coalesce(ds.unique_item_investigations, 0) as unique_item_investigations, "
+ "coalesce(ds.total_item_investigations, 0) as total_item_investigations, "
+ "coalesce(ds.unique_item_requests, 0) as unique_item_requests, "
+ "coalesce(vs.total_item_requests, 0) as total_item_requests "
+ "FROM tmp2 AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests "
+ "AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_all_r5_metrics");
stmt.close();
ConnectDB.getHiveConnection().close();
}
public void finalizeStats() throws Exception {
stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Dropping views_stats table
logger.info("Dropping views_stats table");
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats";
logger.info("Dropped views_stats table ");
stmt.executeUpdate(sql);
// Dropping downloads_stats table
logger.info("Dropping downloads_stats table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats";
logger.info("Dropped downloads_stats table ");
stmt.executeUpdate(sql);
// Dropping page_views_stats table
logger.info("Dropping pageviews_stats table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
logger.info("Dropped pageviews_stats table ");
stmt.executeUpdate(sql);
// Dropping usage_stats table
logger.info("Dropping usage_stats table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats";
logger.info("Dropped usage_stats table ");
stmt.executeUpdate(sql);
// Creating views_stats table
logger.info("Creating views_stats table");
String createViewsStats = "CREATE TABLE IF NOT EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".views_stats "
+ "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_views_stats_tmp STORED AS PARQUET";
stmt.executeUpdate(createViewsStats);
logger.info("Created views_stats table");
// Inserting OpenAIRE views stats
logger.info("Inserting Openaire data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Openaire views updated to views_stats");
// Inserting Episciences views stats
logger.info("Inserting Episciences data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesviews";
stmt.executeUpdate(sql);
logger.info("Episciences views updated to views_stats");
// Inserting Pedocs old views stats
logger.info("Inserting Pedocs old data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Pedocs views updated to views_stats");
// Inserting TUDELFT views stats
logger.info("Inserting TUDELFT data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("TUDELFT views updated to views_stats");
// Inserting Lareferencia views stats
logger.info("Inserting LaReferencia data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("LaReferencia views updated to views_stats");
// Inserting B2SHARE views stats
logger.info("Inserting B2SHARE data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("B2SHARE views updated to views_stats");
logger.info("Creating downloads_stats table");
String createDownloadsStats = "CREATE TABLE IF NOT EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".downloads_stats "
+ "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp STORED AS PARQUET";
stmt.executeUpdate(createDownloadsStats);
logger.info("Created downloads_stats table");
// Inserting OpenAIRE downloads stats
logger.info("Inserting OpenAIRE data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Inserted OpenAIRE data to downloads_stats");
// Inserting Episciences views stats
logger.info("Inserting Episciences data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesdownloads";
stmt.executeUpdate(sql);
logger.info("Episciences downloads updated to downloads_stats");
// Inserting Pedocs old downloads stats
logger.info("Inserting PeDocs old data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Inserted Pedocs data to downloads_stats");
// Inserting TUDELFT downloads stats
logger.info("Inserting TUDELFT data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Inserted TUDELFT data to downloads_stats");
// Inserting B2SHARE downloads stats
logger.info("Inserting B2SHARE data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Inserted B2SHARE data to downloads_stats");
// Inserting Lareferencia downloads stats
logger.info("Inserting LaReferencia data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Lareferencia downloads updated to downloads_stats");
// Inserting IRUS downloads stats
logger.info("Inserting IRUS data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".irus_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("IRUS downloads updated to downloads_stats");
// Inserting IRUS_R5 downloads stats
logger.info("Inserting IRUS_R5 views to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT source, repository_id, result_id, `date`, views, openaire FROM "
+ ConnectDB.getUsageStatsDBSchema()
+ ".irus_R5_stats_tmp";
stmt.executeUpdate(sql);
logger.info("IRUS_R5 views updated to views_stats");
// Inserting IRUS_R5 downloads stats
logger.info("Inserting IRUS_R5 data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT source, repository_id, result_id, `date`, downloads, openaire FROM "
+ ConnectDB.getUsageStatsDBSchema()
+ ".irus_R5_stats_tmp";
stmt.executeUpdate(sql);
logger.info("IRUS_R5 downloads updated to downloads_stats");
// Inserting SARC-OJS downloads stats
logger.info("Inserting SARC data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("SARC-OJS downloads updated to downloads_stats");
// Inserting Datacite views stats
logger.info("Inserting Datacite views to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_views";
stmt.executeUpdate(sql);
logger.info("Datacite views updated to views_stats");
// Inserting Datacite downloads stats
logger.info("Inserting Datacite downloads to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_downloads";
stmt.executeUpdate(sql);
logger.info("Datacite downloads updated to downloads_stats");
logger.info("Creating pageviews_stats table");
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".pageviews_stats "
+ "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp STORED AS PARQUET";
stmt.executeUpdate(create_pageviews_stats);
logger.info("Created pageviews_stats table");
// Inserting OpenAIRE views stats from Portal
logger.info("Inserting data to page_views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Dropping full_dates table");
String dropFullDates = "DROP TABLE IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".full_dates";
stmt.executeUpdate(dropFullDates);
logger.info("Dropped full_dates table");
Calendar startCalendar = Calendar.getInstance();
startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
Calendar endCalendar = Calendar.getInstance();
int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
logger.info("Creating full_dates table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".full_dates AS "
+ "SELECT from_unixtime(unix_timestamp(cast(add_months(from_date,i) AS DATE)), 'yyyy/MM') AS txn_date "
+ "FROM (SELECT DATE '2016-01-01' AS from_date) p "
+ "LATERAL VIEW "
+ "posexplode(split(space(" + diffMonth + "),' ')) pe AS i,x";
stmt.executeUpdate(sql);
logger.info("Created full_dates table");
logger.info("Inserting data to usage_stats");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats AS "
+ "SELECT coalesce(ds.source, vs.source) as source, "
+ "coalesce(ds.repository_id, vs.repository_id) as repository_id, "
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
+ "coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, "
+ "coalesce(ds.openaire, 0) as openaire_downloads, "
+ "coalesce(vs.openaire, 0) as openaire_views "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats AS ds FULL OUTER JOIN "
+ ConnectDB.getUsageStatsDBSchema() + ".views_stats AS vs ON ds.source=vs.source "
+ "AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
stmt.executeUpdate(sql);
logger.info("Inserted data to usage_stats");
// Inserting LaReferencia CoP R5 Metrics
logger.info("Inserting Lareferencia data to tbl_all_r5_metrics");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_all_r5_metrics";
stmt.executeUpdate(sql);
// Inserting IRUS-UK CoP R5 Metrics
logger.info("Inserting IRUS-UK data into tbl_all_r5_metrics");
String insertΡ5Stats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
+ "SELECT s.source, d.id AS repository_id, "
+ "ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, "
+ "s.unique_item_investigations , s.total_item_investigations, "
+ "s.unique_item_requests, s.total_item_requests "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".sushilog_cop_r5 s, "
+ ConnectDB.getStatsDBSchema() + ".datasource_oids d, "
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK'";
stmt.executeUpdate(insertΡ5Stats);
logger.info("Inserted IRUS-UK data into tbl_all_r5_metrics");
logger.info("Building views at permanent DB starts at: " + new Timestamp(System.currentTimeMillis()));
logger.info("Dropping view views_stats on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".views_stats";
stmt.executeUpdate(sql);
logger.info("Dropped view views_stats on permanent usagestats DB");
logger.info("Create view views_stats on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".views_stats"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats";
stmt.executeUpdate(sql);
logger.info("Created view views_stats on permanent usagestats DB");
logger.info("Dropping view pageviews_stats on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats";
stmt.executeUpdate(sql);
logger.info("Dropped view pageviews_stats on permanent usagestats DB");
logger.info("Create view pageviews_stats on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
stmt.executeUpdate(sql);
logger.info("Created view pageviews_stats on permanent usagestats DB");
logger.info("Dropping view downloads_stats on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats";
stmt.executeUpdate(sql);
logger.info("Dropped view on downloads_stats on permanent usagestats DB");
logger.info("Create view on downloads_stats on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats";
stmt.executeUpdate(sql);
logger.info("Created view on downloads_stats on permanent usagestats DB");
logger.info("Dropping view usage_stats on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats";
stmt.executeUpdate(sql);
logger.info("Dropped view on usage_stats on permanent usagestats DB");
logger.info("Create view on usage_stats on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats";
stmt.executeUpdate(sql);
logger.info("Created view on usage_stats on permanent usagestats DB");
logger.info("Dropping view COUNTER_R5_Metrics on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".counter_r5_stats_with_metrics";
stmt.executeUpdate(sql);
logger.info("Dropped view COUNTER_R5_Metrics on permanent usagestats DB");
logger.info("Create view on COUNTER_R5_Metrics on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema()
+ ".counter_r5_stats_with_metrics"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics";
stmt.executeUpdate(sql);
logger.info("Created view on COUNTER_R5_Metrics on permanent usagestats DB");
logger.info("Building views at permanent DB ends at: " + new Timestamp(System.currentTimeMillis()));
stmt.close();
ConnectDB.getHiveConnection().close();
}
private Connection getConnection() throws SQLException {
return ConnectDB.getHiveConnection();
}
}