dnet-hadoop/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/PiwikStatsDB.java

1113 lines
58 KiB
Java
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package eu.dnetlib.oa.graph.usagestatsbuild.export;
import java.sql.*;
import java.text.SimpleDateFormat;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author D. Pierrakos, S. Zoupanos
*/
public class PiwikStatsDB {
private String logPath;
private Statement stmt = null;
private static final Logger logger = LoggerFactory.getLogger(PiwikStatsDB.class);
public PiwikStatsDB() throws Exception {
}
public void recreateDBAndTables() throws Exception {
this.createDatabase();
// The piwiklog table is not needed since it is built
// on top of JSON files
//////////// this.createTmpTables();
}
private void createDatabase() throws Exception {
try {
stmt = ConnectDB.getHiveConnection().createStatement();
logger.info("Dropping usagestats DB: " + ConnectDB.getUsageStatsDBSchema());
String dropDatabase = "DROP DATABASE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + " CASCADE";
stmt.executeUpdate(dropDatabase);
} catch (Exception e) {
logger.error("Failed to drop database: " + e);
throw new Exception("Failed to drop database: " + e.toString(), e);
}
try {
logger.info("Creating usagestats DB: " + ConnectDB.getUsageStatsDBSchema());
String createDatabase = "CREATE DATABASE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema();
stmt.executeUpdate(createDatabase);
logger.info("Usagestats DB created: " + ConnectDB.getUsageStatsDBSchema());
} catch (Exception e) {
logger.error("Failed to create database: " + e);
throw new Exception("Failed to create database: " + e.toString(), e);
}
try {
stmt = ConnectDB.getHiveConnection().createStatement();
logger.info("Creating permanent usagestats DB: " + ConnectDB.getUsagestatsPermanentDBSchema());
String createPermanentDatabase = "CREATE DATABASE IF NOT EXISTS "
+ ConnectDB.getUsagestatsPermanentDBSchema();
stmt.executeUpdate(createPermanentDatabase);
logger.info("Created permanent usagestats DB: " + ConnectDB.getUsagestatsPermanentDBSchema());
} catch (Exception e) {
logger.error("Failed to create database: " + e);
throw new Exception("Failed to create database: " + e.toString(), e);
}
}
public void createDistinctPiwikLog() throws Exception {
logger.info("Initialising DB properties");
ConnectDB.init();
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping piwiklogdistinct");
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct";
stmt.executeUpdate(sql);
logger.info("Dropped piwiklogdistinct");
logger.info("Creating piwiklogdistinct table");
// Create Piwiklogdistinct table - This table should exist
String sqlCreateTablePiwikLogDistinct = "CREATE TABLE IF NOT EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".piwiklogdistinct(source INT, id_visit STRING, country STRING, action STRING, url STRING, "
+ "entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) "
+ "clustered by (source, id_visit, action, timestamp, entity_id) "
+ "into 100 buckets stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(sqlCreateTablePiwikLogDistinct);
logger.info("Created piwiklogdistinct table");
logger.info("Inserting data to piwiklogdistinct");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "SELECT DISTINCT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".piwiklog WHERE entity_id is not null";
stmt.executeUpdate(sql);
logger.info("Inserted data to piwiklogdistinct");
}
public void processLogs() throws Exception {
try {
logger.info("ViewsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
viewsStats();
logger.info("ViewsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
logger.info("DownloadsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
downloadsStats();
logger.info("DownloadsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
logger.info("COUNTER CoP R5 metrics processing starts at: " + new Timestamp(System.currentTimeMillis()));
createCoPR5Tables();
logger.info("COUNTER CoP R5 metrics processing ends at: " + new Timestamp(System.currentTimeMillis()));
} catch (Exception e) {
logger.error("Failed to process logs: " + e);
throw new Exception("Failed to process logs: " + e.toString(), e);
}
}
public void processEpisciencesLogs() throws Exception {
try {
logger.info("Views Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
episciencesViewsStats();
logger.info("Views Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
logger.info("downloads Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
episciencesDownloadsStats();
logger.info("Downloads Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
} catch (Exception e) {
logger.error("Failed to process logs: " + e);
throw new Exception("Failed to process logs: " + e.toString(), e);
}
}
public void viewsStats() throws Exception {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping openaire_result_views_monthly_tmp view");
String drop_result_views_monthly = "DROP VIEW IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".openaire_piwikresult_views_monthly_tmp";
stmt.executeUpdate(drop_result_views_monthly);
logger.info("Dropped openaire_result_views_monthly_tmp view");
logger.info("Creating openaire_result_views_monthly_tmp view");
String create_result_views_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ ".openaire_result_views_monthly_tmp "
+ "AS SELECT entity_id, "
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
+ "AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".piwiklogdistinct where action='action' and (source_item_type='oaItem' or "
+ "source_item_type='repItem') "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
+ "source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_views_monthly);
logger.info("Created openaire_result_views_monthly_tmp table");
logger.info("Dropping openaire_views_stats_tmp table");
String drop_views_stats = "DROP TABLE IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".openaire_views_stats_tmp";
stmt.executeUpdate(drop_views_stats);
logger.info("Dropped openaire_views_stats_tmp table");
logger.info("Creating openaire_views_stats_tmp table");
String create_views_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".openaire_views_stats_tmp "
+ "AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
+ "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month "
+ "ORDER BY d.id, ro.id, month ";
stmt.executeUpdate(create_views_stats);
logger.info("Created openaire_views_stats_tmp table");
logger.info("Creating openaire_pageviews_stats_tmp table");
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".openaire_pageviews_stats_tmp AS SELECT "
+ "'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=" + ExecuteWorkflow.portalMatomoID
+ " AND p.source=d.piwik_id and p.id=ro.id AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
+ "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month "
+ "ORDER BY d.id, ro.id, month ";
stmt.executeUpdate(create_pageviews_stats);
logger.info("Created pageviews_stats table");
stmt.close();
// ConnectDB.getHiveConnection().close();
}
private void downloadsStats() throws Exception {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
String drop_result_downloads_monthly = "DROP VIEW IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".openaire_result_downloads_monthly_tmp";
stmt.executeUpdate(drop_result_downloads_monthly);
logger.info("Dropped openaire_result_downloads_monthly_tmp view");
logger.info("Creating openaire_result_downloads_monthly_tmp view");
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ ".openaire_result_downloads_monthly_tmp "
+ "AS SELECT entity_id, "
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ "COUNT(entity_id) as downloads, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct where action='download' "
+ "AND (source_item_type='oaItem' OR source_item_type='repItem') "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source "
+ "ORDER BY source, entity_id, month";
stmt.executeUpdate(sql);
logger.info("Created openaire_result_downloads_monthly_tmp view");
logger.info("Dropping openaire_downloads_stats_tmp table");
String drop_views_stats = "DROP TABLE IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".openaire_downloads_stats_tmp";
stmt.executeUpdate(drop_views_stats);
logger.info("Dropped openaire_downloads_stats_tmp table");
logger.info("Creating openaire_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id and p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
+ "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month "
+ "ORDER BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created downloads_stats table");
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp";
logger.info("Dropped openaire_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
stmt.close();
// ConnectDB.getHiveConnection().close();
}
public void uploadOldPedocs() throws Exception {
stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Dropping Pedocs pedocs_views_stats_tmp table
logger.info("Dropping Pedocs pedocs_views_stats_tmp table");
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp";
logger.info("Dropped pedocs_views_stats_tmp table ");
stmt.executeUpdate(sql);
// Dropping Pedocs pedocs_downloads_stats table
logger.info("Dropping pedocs_downloads_stats table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats";
logger.info("Dropped pedocs_downloads_stats table ");
stmt.executeUpdate(sql);
// Creating Pedocs pedocs_views_stats_tmp table
logger.info("Creating Pedocs pedocs_views_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, 'opendoar____::ab1a4d0dd4d48a2ba1077c4494791306' as repository_id,"
+ "r.id as result_id,date,counter_abstract as count, 0 as openaire "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".pedocsoldviews p, " + ConnectDB.getStatsDBSchema()
+ ".result_oids r where r.oid=p.identifier";
stmt.executeUpdate(sql);
logger.info("Created pedocs_views_stats_tmp table ");
// Creating Pedocs pedocs_downloads_stats_tmp table
logger.info("Creating Pedocs pedocs_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, 'opendoar____::ab1a4d0dd4d48a2ba1077c4494791306' as repository_id,"
+ "r.id as result_id, date, counter as count, 0 as openaire "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".pedocsolddownloads p, " + ConnectDB.getStatsDBSchema()
+ ".result_oids r where r.oid=p.identifier";
stmt.executeUpdate(sql);
logger.info("Created pedocs_downloads_stats_tmp table ");
}
public void uploadTUDELFTStats() throws Exception {
stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Dropping TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Dropping TUDELFT tudelft_result_views_monthly_tmp view");
String sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp";
logger.info("Dropped tudelft_result_views_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Dropping TUDELFT tudelft_result_downloads_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp";
logger.info("Dropped tudelft_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping TUDELFT tudelft_views_stats_tmp table
logger.info("Dropping TUDELFT tudelft_views_stats_tmp table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp";
logger.info("Dropped tudelft_views_stats_tmp table ");
stmt.executeUpdate(sql);
// Dropping TUDELFT tudelft_downloads_stats_tmp table
logger.info("Dropping TUDELFT tudelft_downloads_stats_tmp table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp";
logger.info("Dropped tudelft_downloads_stats_tmp table ");
stmt.executeUpdate(sql);
// Creating TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Creating TUDELFT tudelft_result_views_monthly_tmp view");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp "
+ "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='action' and (source_item_type='oaItem' or source_item_type='repItem') and source=252 "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
logger.info("Created tudelft_result_views_monthly_tmp view ");
// Creating TUDELFT tudelft_views_stats_tmp table
logger.info("Creating TUDELFT tudelft_views_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".tudelft_result_views_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE concat('tud:',p.id)=ro.oid and d.id='opendoar____::c9892a989183de32e976c6f04e700201' "
+ "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
stmt.executeUpdate(sql);
logger.info("Created TUDELFT tudelft_views_stats_tmp table");
// Creating TUDELFT tudelft_result_downloads_monthly_tmp view
logger.info("Creating TUDELFT tudelft_result_downloads_monthly_tmp view");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp "
+ "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='download' and (source_item_type='oaItem' or source_item_type='repItem') and source=252 "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
logger.info("Created tudelft_result_downloads_monthly_tmp view ");
// Creating TUDELFT tudelft_downloads_stats_tmp table
logger.info("Creating TUDELFT tudelft_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp AS "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".tudelft_result_downloads_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE concat('tud:',p.id)=ro.oid and d.id='opendoar____::c9892a989183de32e976c6f04e700201' "
+ "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
stmt.executeUpdate(sql);
logger.info("Created TUDELFT tudelft_downloads_stats_tmp table");
// Dropping TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Dropping TUDELFT tudelft_result_views_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp";
logger.info("Dropped tudelft_result_views_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping TUDELFT tudelft_result_views_monthly_tmp view
logger.info("Dropping TUDELFT tudelft_result_downloads_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp";
logger.info("Dropped tudelft_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
}
public void uploadB2SHAREStats() throws Exception {
stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Dropping B2SHARE b2share_result_views_monthly_tmp view
logger.info("Dropping B2SHARE b2share_result_views_monthly_tmp view");
String sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp";
logger.info("Dropped b2share_result_views_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping B2SHARE b2share_result_views_monthly_tmp view
logger.info("Dropping b2SHARE b2share_result_downloads_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp";
logger.info("Dropped b2share_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping B2SHARE b2share_views_stats_tmp table
logger.info("Dropping B2SHARE b2share_views_stats_tmp table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp";
logger.info("Dropped b2share_views_stats_tmp table ");
stmt.executeUpdate(sql);
// Dropping B2SHARE b2share_downloads_stats_tmp table
logger.info("Dropping B2SHARE b2share_downloads_stats_tmp table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
logger.info("Dropped b2share_downloads_stats_tmp table ");
stmt.executeUpdate(sql);
// Creating B2SHARE b2share_result_views_monthly_tmp view
logger.info("Creating B2SHARE b2share_result_views_monthly_tmp view");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp "
+ "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='action' and (source_item_type='oaItem' or source_item_type='repItem') and source=412 "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
logger.info("Created b2share_result_views_monthly_tmp view ");
// Creating B2SHARE b2share_views_stats_tmp table
logger.info("Creating B2SHARE b2share_views_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp AS "
+ "SELECT 'B2SHARE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".b2share_result_views_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.id=ro.oid and d.id='re3data_____::ad3609c351bd520edf6f10f5e0d9b877' "
+ "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
stmt.executeUpdate(sql);
logger.info("Created B2SHARE b2share_views_stats_tmp table");
// Creating B2SHARE b2share_result_downloads_monthly_tmp view
logger.info("Creating B2SHARE b2share_result_downloads_monthly_tmp view");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp "
+ "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='download' and (source_item_type='oaItem' or source_item_type='repItem') and source=412 "
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
logger.info("Created b2share_result_downloads_monthly_tmp view ");
// Creating B2SHARE b2share_downloads_stats_tmp table
logger.info("Creating B2SHARE b2share_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp AS "
+ "SELECT 'B2SHARE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
+ ".b2share_result_downloads_monthly_tmp p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.id=ro.oid and d.id='re3data_____::ad3609c351bd520edf6f10f5e0d9b877' "
+ "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
stmt.executeUpdate(sql);
logger.info("Created B2SHARE b2share_downloads_stats_tmp table");
// Dropping B2SHARE b2share_result_views_monthly_tmp view
logger.info("Dropping B2SHARE b2share_result_views_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp";
logger.info("Dropped b2share_result_views_monthly_tmp view ");
stmt.executeUpdate(sql);
// Dropping B2SHARE b2share_result_views_monthly_tmp view
logger.info("Dropping B2SHARE b2share_result_downloads_monthly_tmp view");
sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp";
logger.info("Dropped b2share_result_downloads_monthly_tmp view ");
stmt.executeUpdate(sql);
}
public void episciencesViewsStats() throws Exception {
logger.info("Creating episciences Views");
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping Episcience Views Table");
String dropEpisciencesViewsTable = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesviews ";
stmt.executeUpdate(dropEpisciencesViewsTable);
logger.info("Dropped Episcience Views Table");
logger.info("Creating Episcience Views Table");
String createEpisciencesViewsTable = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesviews (source STRING, repository_id STRING, result_id STRING, date STRING, count INT, openaire INT)"
+ " clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true') ";
stmt.executeUpdate(createEpisciencesViewsTable);
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
+ ConnectDB.getStatsDBSchema() +
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
.prepareStatement(returnEpisciencesJournals);
ResultSet rs = st.executeQuery();
while (rs.next()) {
String journal_openaire_id = rs.getString(1);
String episciencesSuffix = rs.getString(2);
logger.info("Working on journal_id:" + journal_openaire_id + " suffix:" + episciencesSuffix);
logger.info("Dropping episciencesSuffix_result_views_monthly_tmp table");
String dropepisciencesSuffixView = "DROP VIEW " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp";
// Statement stmtRS = ConnectDB.getHiveConnection().createStatement();
stmt.executeUpdate(dropepisciencesSuffixView);
logger.info("Dropped episciencesSuffix_result_views_monthly_tmp table");
logger.info("Creating episciencesSuffix_result_views_monthly_tmp table");
String create_result_views_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp "
+ "AS SELECT entity_id, "
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
+ "AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema()
+ ".episcienceslog where action='action' and (source_item_type='oaItem' or "
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
+ "source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_views_monthly);
logger.info("Created episciencesSuffix_result_views_monthly_tmp table");
logger.info("Inserting episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
String insertIntoEpisciencesViewsTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesviews SELECT 'Episciences' as source, '"
+ journal_openaire_id + "' as repository_id, ro.id as result_id, month as date,"
+ " max(views) AS count, max(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp p,"
+ ConnectDB.getStatsDBSchema()
+ ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
stmt.executeUpdate(insertIntoEpisciencesViewsTable);
logger.info("Inserted episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
stmt.executeUpdate(dropepisciencesSuffixView);
logger.info("Dropped episciencesSuffix_result_views_monthly_tmp view");
}
rs.close();
logger.info("Episciences Views Created");
}
public void episciencesDownloadsStats() throws Exception {
logger.info("Creating episciences Downloads");
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
logger.info("Dropping Episcience Downloads Table");
String dropEpisciencesDownloadsTable = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesvdownloads ";
stmt.executeUpdate(dropEpisciencesDownloadsTable);
logger.info("Dropped Episcience Downloads Table");
logger.info("Creating Episcience Downloads Table");
String createEpisciencesDownloadsTable = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesdownloads (source STRING, repository_id STRING, result_id STRING, date STRING, count INT, openaire INT)"
+ " clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true') ";
stmt.executeUpdate(createEpisciencesDownloadsTable);
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
+ ConnectDB.getStatsDBSchema() +
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
.prepareStatement(returnEpisciencesJournals);
ResultSet rs = st.executeQuery();
while (rs.next()) {
String journal_openaire_id = rs.getString(1);
String episciencesSuffix = rs.getString(2);
logger.info("Working on journal_id:" + journal_openaire_id + " suffix:" + episciencesSuffix);
logger.info("Dropping episciencesSuffix_result_downloads_monthly_tmp table");
String dropepisciencesSuffixDownloads = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp";
stmt.executeUpdate(dropepisciencesSuffixDownloads);
logger.info("Creating episciencesSuffix_result_downloads_monthly_tmp table");
String create_result_downloads_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp "
+ "AS SELECT entity_id, "
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
+ "AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema()
+ ".episcienceslog where action='download' and (source_item_type='oaItem' or "
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
+ "source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_downloads_monthly);
logger.info("Created episciencesSuffix_result_downloads_monthly_tmp table");
logger.info("Inserting episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
String insertIntoEpisciencesDownloadsTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ ".episciencesdownloads SELECT 'Episciences' as source, '"
+ journal_openaire_id + "' as repository_id, ro.id as result_id, month as date,"
+ " max(views) AS count, max(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
+ "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp p,"
+ ConnectDB.getStatsDBSchema()
+ ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
stmt.executeUpdate(insertIntoEpisciencesDownloadsTable);
logger.info("Inserted episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
stmt.executeUpdate(dropepisciencesSuffixDownloads);
logger.info("Dropped episciencesSuffix_result_downloads_monthly_tmp view");
}
rs.close();
}
private void createCoPR5Tables() throws Exception {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Unique Item Investigations
//REMOVE sessionid from total
logger.info("Create View Unique_Item_Investigations");
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_investigations "
+ "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
stmt.executeUpdate(sql);
logger.info("Created View Unique_Item_Investigations");
logger.info("Drop Table Unique_Item_Investigations");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations ";
stmt.executeUpdate(sql);
logger.info("Dropped Table Unique_Item_Investigations");
logger.info("Create Table tbl_unique_item_investigations");
sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations as "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "sum(unique_item_investigations) AS unique_item_investigations, sum(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_investigations p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
+ "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_unique_item_investigations");
// Total Item Investigations
logger.info("Create View view_total_item_investigations");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_investigations "
+ "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) AS total_item_investigations, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
stmt.executeUpdate(sql);
logger.info("Created View view_total_item_investigations");
logger.info("Drop Table tbl_total_item_investigations");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations ";
stmt.executeUpdate(sql);
logger.info("Dropped Table tbl_total_item_investigations");
logger.info("Create Table tbl_total_item_investigations");
sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations AS "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "sum(total_item_investigations) AS total_item_investigations, sum(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_investigations p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
+ "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_total_item_investigations");
// Unique Item Requests
logger.info("Create View view_unique_item_requests");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_requests AS "
+ "SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
stmt.executeUpdate(sql);
logger.info("Created View view_unique_item_requests");
logger.info("Drop Table Unique_Item_Requests");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests ";
stmt.executeUpdate(sql);
logger.info("Dropped Table Unique_Item_Requests");
logger.info("Create Table tbl_unique_item_requests");
sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests as "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "sum(unique_item_requests) AS unique_item_requests, sum(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_requests p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
+ "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_unique_item_requests");
// Total Item Requests
logger.info("Create View view_total_item_requests");
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_requests "
+ "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
+ "COUNT(entity_id) AS total_item_requests, "
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
+ "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
stmt.executeUpdate(sql);
logger.info("Created View view_total_item_requests");
logger.info("Drop Table tbl_total_item_requests");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests ";
stmt.executeUpdate(sql);
logger.info("Dropped Table tbl_total_item_requests");
logger.info("Create Table tbl_total_item_requests");
sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests as "
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
+ "sum(total_item_requests) AS total_item_requests, sum(openaire_referrer) AS openaire "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_requests p, "
+ ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
+ "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
+ "GROUP BY d.id, ro.id, month ";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_total_item_requests");
// All CoP R5 metrics Table
logger.info("Drop Table tbl_all_r5_metrics");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics ";
stmt.executeUpdate(sql);
logger.info("Dropped Table tbl_all_r5_metrics");
logger.info("Create Table tbl_all_r5_metrics");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".tbl_all_r5_metrics as "
+ "WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id, "
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
+ "coalesce(vs.unique_item_investigations, 0) as unique_item_investigations, "
+ "coalesce(ds.total_item_investigations, 0) as total_item_investigations "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations AS vs "
+ "FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations AS ds "
+ " ON ds.source=vs.source AND ds.result_id=vs.result_id AND ds.date=vs.date), "
+ "tmp2 AS (select coalesce (ds.repository_id, vs.repository_id) as repository_id, "
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
+ "coalesce(ds.total_item_investigations, 0) as total_item_investigations, "
+ "coalesce(ds.unique_item_investigations, 0) as unique_item_investigations, "
+ " coalesce(vs.unique_item_requests, 0) as unique_item_requests FROM tmp1 "
+ "AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests AS vs "
+ "ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date) "
+ "SELECT 'OpenAIRE' as source, coalesce (ds.repository_id, vs.repository_id) as repository_id, "
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
+ "coalesce(ds.unique_item_investigations, 0) as unique_item_investigations, "
+ "coalesce(ds.total_item_investigations, 0) as total_item_investigations, "
+ "coalesce(ds.unique_item_requests, 0) as unique_item_requests, "
+ "coalesce(vs.total_item_requests, 0) as total_item_requests "
+ "FROM tmp2 AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests "
+ "AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
stmt.executeUpdate(sql);
logger.info("Created Table tbl_all_r5_metrics");
stmt.close();
ConnectDB.getHiveConnection().close();
}
public void finalizeStats() throws Exception {
stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
// Dropping views_stats table
logger.info("Dropping views_stats table");
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats";
logger.info("Dropped views_stats table ");
stmt.executeUpdate(sql);
// Dropping downloads_stats table
logger.info("Dropping downloads_stats table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats";
logger.info("Dropped downloads_stats table ");
stmt.executeUpdate(sql);
// Dropping page_views_stats table
logger.info("Dropping pageviews_stats table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
logger.info("Dropped pageviews_stats table ");
stmt.executeUpdate(sql);
// Dropping usage_stats table
logger.info("Dropping usage_stats table");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats";
logger.info("Dropped usage_stats table ");
stmt.executeUpdate(sql);
// Creating views_stats table
logger.info("Creating views_stats table");
String createViewsStats = "CREATE TABLE IF NOT EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".views_stats "
+ "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_views_stats_tmp STORED AS PARQUET";
stmt.executeUpdate(createViewsStats);
logger.info("Created views_stats table");
// Inserting OpenAIRE views stats
logger.info("Inserting Openaire data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Openaire views updated to views_stats");
// Inserting Episciences views stats
logger.info("Inserting Episciences data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesviews";
stmt.executeUpdate(sql);
logger.info("Episciences views updated to views_stats");
// Inserting Pedocs old views stats
logger.info("Inserting Pedocs old data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Pedocs views updated to views_stats");
// Inserting TUDELFT views stats
logger.info("Inserting TUDELFT data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("TUDELFT views updated to views_stats");
// Inserting Lareferencia views stats
logger.info("Inserting LaReferencia data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("LaReferencia views updated to views_stats");
// Inserting B2SHARE views stats
logger.info("Inserting B2SHARE data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp";
stmt.executeUpdate(sql);
logger.info("B2SHARE views updated to views_stats");
logger.info("Creating downloads_stats table");
String createDownloadsStats = "CREATE TABLE IF NOT EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".downloads_stats "
+ "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp STORED AS PARQUET";
stmt.executeUpdate(createDownloadsStats);
logger.info("Created downloads_stats table");
// Inserting OpenAIRE downloads stats
logger.info("Inserting OpenAIRE data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Inserted OpenAIRE data to downloads_stats");
// Inserting Episciences views stats
logger.info("Inserting Episciences data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesdownloads";
stmt.executeUpdate(sql);
logger.info("Episciences downloads updated to downloads_stats");
// Inserting Pedocs old downloads stats
logger.info("Inserting PeDocs old data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Inserted Pedocs data to downloads_stats");
// Inserting TUDELFT downloads stats
logger.info("Inserting TUDELFT data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Inserted TUDELFT data to downloads_stats");
// Inserting B2SHARE downloads stats
logger.info("Inserting B2SHARE data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Inserted B2SHARE data to downloads_stats");
// Inserting Lareferencia downloads stats
logger.info("Inserting LaReferencia data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Lareferencia downloads updated to downloads_stats");
// Inserting IRUS downloads stats
logger.info("Inserting IRUS data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".irus_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("IRUS downloads updated to downloads_stats");
// Inserting IRUS_R5 downloads stats
logger.info("Inserting IRUS_R5 views to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT source, repository_id, result_id, `date`, views, openaire FROM "
+ ConnectDB.getUsageStatsDBSchema()
+ ".irus_R5_stats_tmp";
stmt.executeUpdate(sql);
logger.info("IRUS_R5 views updated to views_stats");
// Inserting IRUS_R5 downloads stats
logger.info("Inserting IRUS_R5 data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT source, repository_id, result_id, `date`, downloads, openaire FROM "
+ ConnectDB.getUsageStatsDBSchema()
+ ".irus_R5_stats_tmp";
stmt.executeUpdate(sql);
logger.info("IRUS_R5 downloads updated to downloads_stats");
// Inserting SARC-OJS downloads stats
logger.info("Inserting SARC data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_downloads_stats_tmp";
stmt.executeUpdate(sql);
logger.info("SARC-OJS downloads updated to downloads_stats");
// Inserting Datacite views stats
logger.info("Inserting Datacite views to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_views";
stmt.executeUpdate(sql);
logger.info("Datacite views updated to views_stats");
// Inserting Datacite downloads stats
logger.info("Inserting Datacite downloads to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_downloads";
stmt.executeUpdate(sql);
logger.info("Datacite downloads updated to downloads_stats");
logger.info("Creating pageviews_stats table");
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".pageviews_stats "
+ "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp STORED AS PARQUET";
stmt.executeUpdate(create_pageviews_stats);
logger.info("Created pageviews_stats table");
// Inserting OpenAIRE views stats from Portal
logger.info("Inserting data to page_views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp";
stmt.executeUpdate(sql);
logger.info("Dropping full_dates table");
String dropFullDates = "DROP TABLE IF EXISTS "
+ ConnectDB.getUsageStatsDBSchema()
+ ".full_dates";
stmt.executeUpdate(dropFullDates);
logger.info("Dropped full_dates table");
Calendar startCalendar = Calendar.getInstance();
startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
Calendar endCalendar = Calendar.getInstance();
int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
logger.info("Creating full_dates table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".full_dates AS "
+ "SELECT from_unixtime(unix_timestamp(cast(add_months(from_date,i) AS DATE)), 'yyyy/MM') AS txn_date "
+ "FROM (SELECT DATE '2016-01-01' AS from_date) p "
+ "LATERAL VIEW "
+ "posexplode(split(space(" + diffMonth + "),' ')) pe AS i,x";
stmt.executeUpdate(sql);
logger.info("Created full_dates table");
logger.info("Inserting data to usage_stats");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats AS "
+ "SELECT coalesce(ds.source, vs.source) as source, "
+ "coalesce(ds.repository_id, vs.repository_id) as repository_id, "
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
+ "coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, "
+ "coalesce(ds.openaire, 0) as openaire_downloads, "
+ "coalesce(vs.openaire, 0) as openaire_views "
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats AS ds FULL OUTER JOIN "
+ ConnectDB.getUsageStatsDBSchema() + ".views_stats AS vs ON ds.source=vs.source "
+ "AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
stmt.executeUpdate(sql);
logger.info("Inserted data to usage_stats");
// Inserting LaReferencia CoP R5 Metrics
logger.info("Inserting Lareferencia data to tbl_all_r5_metrics");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_all_r5_metrics";
stmt.executeUpdate(sql);
// Inserting IRUS-UK CoP R5 Metrics
logger.info("Inserting IRUS-UK data into tbl_all_r5_metrics");
String insertΡ5Stats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
+ "SELECT s.source, d.id AS repository_id, "
+ "ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, "
+ "s.unique_item_investigations , s.total_item_investigations, "
+ "s.unique_item_requests, s.total_item_requests "
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".sushilog_cop_r5 s, "
+ ConnectDB.getStatsDBSchema() + ".datasource_oids d, "
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
+ "WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK'";
stmt.executeUpdate(insertΡ5Stats);
logger.info("Inserted IRUS-UK data into tbl_all_r5_metrics");
logger.info("Building views at permanent DB starts at: " + new Timestamp(System.currentTimeMillis()));
logger.info("Dropping view views_stats on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".views_stats";
stmt.executeUpdate(sql);
logger.info("Dropped view views_stats on permanent usagestats DB");
logger.info("Create view views_stats on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".views_stats"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats";
stmt.executeUpdate(sql);
logger.info("Created view views_stats on permanent usagestats DB");
logger.info("Dropping view pageviews_stats on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats";
stmt.executeUpdate(sql);
logger.info("Dropped view pageviews_stats on permanent usagestats DB");
logger.info("Create view pageviews_stats on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
stmt.executeUpdate(sql);
logger.info("Created view pageviews_stats on permanent usagestats DB");
logger.info("Dropping view downloads_stats on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats";
stmt.executeUpdate(sql);
logger.info("Dropped view on downloads_stats on permanent usagestats DB");
logger.info("Create view on downloads_stats on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats";
stmt.executeUpdate(sql);
logger.info("Created view on downloads_stats on permanent usagestats DB");
logger.info("Dropping view usage_stats on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats";
stmt.executeUpdate(sql);
logger.info("Dropped view on usage_stats on permanent usagestats DB");
logger.info("Create view on usage_stats on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats";
stmt.executeUpdate(sql);
logger.info("Created view on usage_stats on permanent usagestats DB");
logger.info("Dropping view COUNTER_R5_Metrics on permanent usagestats DB");
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".counter_r5_stats_with_metrics";
stmt.executeUpdate(sql);
logger.info("Dropped view COUNTER_R5_Metrics on permanent usagestats DB");
logger.info("Create view on COUNTER_R5_Metrics on permanent usagestats DB");
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema()
+ ".counter_r5_stats_with_metrics"
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics";
stmt.executeUpdate(sql);
logger.info("Created view on COUNTER_R5_Metrics on permanent usagestats DB");
logger.info("Building views at permanent DB ends at: " + new Timestamp(System.currentTimeMillis()));
stmt.close();
ConnectDB.getHiveConnection().close();
}
private Connection getConnection() throws SQLException {
return ConnectDB.getHiveConnection();
}
}