forked from D-Net/dnet-hadoop
Last commit
This commit is contained in:
parent
6b247524a8
commit
22eaf211e8
|
@ -397,8 +397,8 @@ public class ReadReportsListFromDatacite {
|
||||||
+ ".datacite_downloads STORED AS PARQUET as "
|
+ ".datacite_downloads STORED AS PARQUET as "
|
||||||
+ "SELECT 'Datacite' source, d.id repository_id, od.id result_id, regexp_replace(substring(string(period_end),0,7),'-','/') date, count, '0' openaire "
|
+ "SELECT 'Datacite' source, d.id repository_id, od.id result_id, regexp_replace(substring(string(period_end),0,7),'-','/') date, count, '0' openaire "
|
||||||
+ "FROM " + ConnectDB.getDataSetUsageStatsDBSchema() + ".datasetsperformance "
|
+ "FROM " + ConnectDB.getDataSetUsageStatsDBSchema() + ".datasetsperformance "
|
||||||
+ "JOIN " + ConnectDB.getStatsDBSchema() + ".datasource d on name=platform "
|
+ "JOIN " + ConnectDB.getStatsDBSchema() + ".datasource d on lower(name)=lower(platform) "
|
||||||
+ "JOIN " + ConnectDB.getStatsDBSchema() + ".result_oids od on string(ds_type)=od.oid "
|
+ "JOIN " + ConnectDB.getStatsDBSchema() + ".result_oids od on lower(string(ds_type))=lower(od.oid) "
|
||||||
+ "where metric_type='total-dataset-requests' ";
|
+ "where metric_type='total-dataset-requests' ";
|
||||||
stmt.executeUpdate(createDownloadsTable);
|
stmt.executeUpdate(createDownloadsTable);
|
||||||
logger.info("Downloads Stats table created");
|
logger.info("Downloads Stats table created");
|
||||||
|
@ -408,8 +408,8 @@ public class ReadReportsListFromDatacite {
|
||||||
+ ".datacite_views STORED AS PARQUET as "
|
+ ".datacite_views STORED AS PARQUET as "
|
||||||
+ "SELECT 'Datacite' source, d.id repository_id, od.id result_id, regexp_replace(substring(string(period_end),0,7),'-','/') date, count, '0' openaire "
|
+ "SELECT 'Datacite' source, d.id repository_id, od.id result_id, regexp_replace(substring(string(period_end),0,7),'-','/') date, count, '0' openaire "
|
||||||
+ "FROM " + ConnectDB.getDataSetUsageStatsDBSchema() + ".datasetsperformance "
|
+ "FROM " + ConnectDB.getDataSetUsageStatsDBSchema() + ".datasetsperformance "
|
||||||
+ "JOIN " + ConnectDB.getStatsDBSchema() + ".datasource d on name=platform "
|
+ "JOIN " + ConnectDB.getStatsDBSchema() + ".datasource d on lower(name)=lower(platform) "
|
||||||
+ "JOIN " + ConnectDB.getStatsDBSchema() + ".result_oids od on string(ds_type)=od.oid "
|
+ "JOIN " + ConnectDB.getStatsDBSchema() + ".result_oids od on lower(string(ds_type))=lower(od.oid) "
|
||||||
+ "where metric_type='total-dataset-investigations' ";
|
+ "where metric_type='total-dataset-investigations' ";
|
||||||
stmt.executeUpdate(createViewsTable);
|
stmt.executeUpdate(createViewsTable);
|
||||||
logger.info("Views Stats table created");
|
logger.info("Views Stats table created");
|
||||||
|
|
|
@ -158,7 +158,7 @@ public class LaReferenciaDownloadLogs {
|
||||||
// end.add(Calendar.MONTH, +1);
|
// end.add(Calendar.MONTH, +1);
|
||||||
// end.add(Calendar.DAY_OF_MONTH, -1);
|
// end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
Calendar end = Calendar.getInstance();
|
Calendar end = Calendar.getInstance();
|
||||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
end.add(Calendar.DAY_OF_MONTH, -3);
|
||||||
|
|
||||||
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
|
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
|
||||||
|
|
||||||
|
@ -205,7 +205,7 @@ public class LaReferenciaDownloadLogs {
|
||||||
true);
|
true);
|
||||||
|
|
||||||
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + laReferencialMatomoID + period + format
|
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + laReferencialMatomoID + period + format
|
||||||
+ "&expanded=5&filter_limit=500&token_auth=" + tokenAuth;
|
+ "&expanded=5&filter_limit=100&token_auth=" + tokenAuth;
|
||||||
String content = "";
|
String content = "";
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
|
|
|
@ -228,6 +228,12 @@ public class PiwikDownloadLogs {
|
||||||
while (rs.next()) {
|
while (rs.next()) {
|
||||||
piwikIdToVisit.add(rs.getInt(1));
|
piwikIdToVisit.add(rs.getInt(1));
|
||||||
}
|
}
|
||||||
|
piwikIdToVisit.add(630);
|
||||||
|
piwikIdToVisit.add(662);
|
||||||
|
piwikIdToVisit.add(694);
|
||||||
|
piwikIdToVisit.add(725);
|
||||||
|
piwikIdToVisit.add(728);
|
||||||
|
|
||||||
logger.info("Found the following piwikIds for download: " + piwikIdToVisit);
|
logger.info("Found the following piwikIds for download: " + piwikIdToVisit);
|
||||||
|
|
||||||
if (ExecuteWorkflow.numberOfPiwikIdsToDownload > 0
|
if (ExecuteWorkflow.numberOfPiwikIdsToDownload > 0
|
||||||
|
|
|
@ -51,7 +51,6 @@ public class UsageStatsExporter {
|
||||||
ConnectDB.init();
|
ConnectDB.init();
|
||||||
|
|
||||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
||||||
|
|
||||||
logger.info("Re-creating database and tables");
|
logger.info("Re-creating database and tables");
|
||||||
if (ExecuteWorkflow.recreateDbAndTables) {
|
if (ExecuteWorkflow.recreateDbAndTables) {
|
||||||
piwikstatsdb.recreateDBAndTables();
|
piwikstatsdb.recreateDBAndTables();
|
||||||
|
|
|
@ -41,8 +41,42 @@ public class LaReferenciaStats {
|
||||||
public LaReferenciaStats() throws Exception {
|
public LaReferenciaStats() throws Exception {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void createDistinctLaReferenciaLog() throws Exception {
|
||||||
|
logger.info("Initialising DB properties");
|
||||||
|
ConnectDB.init();
|
||||||
|
|
||||||
|
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||||
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
logger.info("Dropping lareferencialogdistinct");
|
||||||
|
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogdistinct";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Dropped lareferencialogdistinct");
|
||||||
|
|
||||||
|
logger.info("Creating lareferencialogdistinct table");
|
||||||
|
// Create Piwiklogdistinct table - This table should exist
|
||||||
|
String sqlCreateTablePiwikLogDistinct = "CREATE TABLE IF NOT EXISTS "
|
||||||
|
+ ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".lareferencialogdistinct(matomoid INT, source STRING, id_visit STRING, country STRING, action STRING, url STRING, "
|
||||||
|
+ "entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) "
|
||||||
|
+ "clustered by (source, id_visit, action, timestamp, entity_id) "
|
||||||
|
+ "into 100 buckets stored as orc tblproperties('transactional'='true')";
|
||||||
|
stmt.executeUpdate(sqlCreateTablePiwikLogDistinct);
|
||||||
|
logger.info("Created lareferencialogdistinct table");
|
||||||
|
|
||||||
|
logger.info("Inserting data to lareferencialogdistinct");
|
||||||
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogdistinct "
|
||||||
|
+ "SELECT DISTINCT * FROM " + ConnectDB.getUsageRawDataDBSchema()
|
||||||
|
+ ".lareferencialog WHERE entity_id is not null";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Inserted data to lareferencialogdistinct");
|
||||||
|
}
|
||||||
|
|
||||||
public void processLogs() throws Exception {
|
public void processLogs() throws Exception {
|
||||||
try {
|
try {
|
||||||
|
logger.info("Creating LareferenciaLogDistinct");
|
||||||
|
createDistinctLaReferenciaLog();
|
||||||
|
|
||||||
logger.info("LaReferencia creating viewsStats");
|
logger.info("LaReferencia creating viewsStats");
|
||||||
viewsStats();
|
viewsStats();
|
||||||
logger.info("LaReferencia created viewsStats");
|
logger.info("LaReferencia created viewsStats");
|
||||||
|
@ -76,7 +110,7 @@ public class LaReferenciaStats {
|
||||||
"SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
|
"SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
|
||||||
"THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
"THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
||||||
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
||||||
"FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog where action='action' and " +
|
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogdistinct where action='action' and " +
|
||||||
"(source_item_type='oaItem' or source_item_type='repItem') " +
|
"(source_item_type='oaItem' or source_item_type='repItem') " +
|
||||||
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
||||||
"source ORDER BY source, entity_id";
|
"source ORDER BY source, entity_id";
|
||||||
|
@ -117,7 +151,7 @@ public class LaReferenciaStats {
|
||||||
"SELECT entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
|
"SELECT entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
|
||||||
"THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
"THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
||||||
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
||||||
"FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog where action='download' and " +
|
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogdistinct where action='download' and " +
|
||||||
"(source_item_type='oaItem' or source_item_type='repItem') " +
|
"(source_item_type='oaItem' or source_item_type='repItem') " +
|
||||||
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
||||||
"source ORDER BY source, entity_id";
|
"source ORDER BY source, entity_id";
|
||||||
|
@ -160,7 +194,7 @@ public class LaReferenciaStats {
|
||||||
+ "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations, "
|
+ "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations, "
|
||||||
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
||||||
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog "
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogdistinct "
|
||||||
+ "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
|
+ "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
|
||||||
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
|
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
|
||||||
|
@ -192,7 +226,7 @@ public class LaReferenciaStats {
|
||||||
+ "COUNT(entity_id) AS total_item_investigations, "
|
+ "COUNT(entity_id) AS total_item_investigations, "
|
||||||
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
||||||
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog "
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogdistinct "
|
||||||
+ "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
|
+ "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
|
||||||
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
|
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
|
||||||
|
@ -224,7 +258,7 @@ public class LaReferenciaStats {
|
||||||
+ "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests, "
|
+ "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests, "
|
||||||
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
||||||
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog "
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogdistinct "
|
||||||
+ "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
|
+ "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
|
||||||
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
|
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
|
||||||
|
@ -256,7 +290,7 @@ public class LaReferenciaStats {
|
||||||
+ "COUNT(entity_id) AS total_item_requests, "
|
+ "COUNT(entity_id) AS total_item_requests, "
|
||||||
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
||||||
+ "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog "
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogdistinct "
|
||||||
+ "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
|
+ "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
|
||||||
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
|
+ "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
|
||||||
|
|
|
@ -101,12 +101,43 @@ public class PiwikStatsDB {
|
||||||
logger.info("Inserted data to piwiklogdistinct");
|
logger.info("Inserted data to piwiklogdistinct");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void createDistinctEpisciencesLog() throws Exception {
|
||||||
|
logger.info("Initialising DB properties");
|
||||||
|
ConnectDB.init();
|
||||||
|
|
||||||
|
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||||
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
logger.info("Dropping episcienceslogdistinct");
|
||||||
|
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".episcienceslogdistinct";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Dropped episcienceslogdistinct");
|
||||||
|
|
||||||
|
logger.info("Creating episcienceslogdistinct table");
|
||||||
|
// Create Piwiklogdistinct table - This table should exist
|
||||||
|
String sqlCreateTablePiwikLogDistinct = "CREATE TABLE IF NOT EXISTS "
|
||||||
|
+ ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".episcienceslogdistinct(source INT, id_visit STRING, country STRING, action STRING, url STRING, "
|
||||||
|
+ "entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) "
|
||||||
|
+ "clustered by (source, id_visit, action, timestamp, entity_id) "
|
||||||
|
+ "into 100 buckets stored as orc tblproperties('transactional'='true')";
|
||||||
|
stmt.executeUpdate(sqlCreateTablePiwikLogDistinct);
|
||||||
|
logger.info("Created episcienceslogdistinct table");
|
||||||
|
|
||||||
|
logger.info("Inserting data to episcienceslogdistinct");
|
||||||
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".episcienceslogdistinct "
|
||||||
|
+ "SELECT DISTINCT * FROM " + ConnectDB.getUsageRawDataDBSchema()
|
||||||
|
+ ".episcienceslog WHERE entity_id is not null";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Inserted data to episcienceslogdistinct");
|
||||||
|
}
|
||||||
|
|
||||||
public void processLogs() throws Exception {
|
public void processLogs() throws Exception {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
logger.info("ViewsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
//to remove logger.info("ViewsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
||||||
viewsStats();
|
// viewsStats();
|
||||||
logger.info("ViewsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
|
//to remove logger.info("ViewsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
|
||||||
|
|
||||||
logger.info("DownloadsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
logger.info("DownloadsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
||||||
downloadsStats();
|
downloadsStats();
|
||||||
|
@ -125,6 +156,10 @@ public class PiwikStatsDB {
|
||||||
public void processEpisciencesLogs() throws Exception {
|
public void processEpisciencesLogs() throws Exception {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
logger.info("Creating EpisciencesLogDistinct Table");
|
||||||
|
createDistinctEpisciencesLog();
|
||||||
|
logger.info("Creating EpisciencesLogDistinct Table Created");
|
||||||
|
|
||||||
logger.info("Views Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
logger.info("Views Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
|
||||||
episciencesViewsStats();
|
episciencesViewsStats();
|
||||||
logger.info("Views Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
|
logger.info("Views Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
|
||||||
|
@ -172,7 +207,7 @@ public class PiwikStatsDB {
|
||||||
+ ".openaire_views_stats_tmp";
|
+ ".openaire_views_stats_tmp";
|
||||||
stmt.executeUpdate(drop_views_stats);
|
stmt.executeUpdate(drop_views_stats);
|
||||||
logger.info("Dropped openaire_views_stats_tmp table");
|
logger.info("Dropped openaire_views_stats_tmp table");
|
||||||
|
//
|
||||||
logger.info("Creating openaire_views_stats_tmp table");
|
logger.info("Creating openaire_views_stats_tmp table");
|
||||||
String create_views_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
String create_views_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".openaire_views_stats_tmp "
|
+ ".openaire_views_stats_tmp "
|
||||||
|
@ -187,6 +222,71 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(create_views_stats);
|
stmt.executeUpdate(create_views_stats);
|
||||||
logger.info("Created openaire_views_stats_tmp table");
|
logger.info("Created openaire_views_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 630 in openaire_views_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_630 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_views_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::cfa5301358b9fcbe7aa45b1ceea088c6' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=630 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_630);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 630 in openaire_views_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 662 in openaire_views_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_662 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_views_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::4e86eaf2685a67b743a475f86c7c0086' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=662 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_662);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 662 in openaire_views_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 694 in openaire_views_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_694 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_views_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::f35fd567065af297ae65b621e0a21ae9' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=694 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_694);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 694 in openaire_views_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 725 in openaire_views_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_725 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_views_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::7180cffd6a8e829dacfc2a31b3f72ece' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=725 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_725);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 725 in openaire_views_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 728 in openaire_views_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_728 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_views_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::8b3bac12926cc1d9fb5d68783376971d' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(views) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=728 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_728);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 728 in openaire_views_stats_tmp table");
|
||||||
|
|
||||||
logger.info("Creating openaire_pageviews_stats_tmp table");
|
logger.info("Creating openaire_pageviews_stats_tmp table");
|
||||||
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".openaire_pageviews_stats_tmp AS SELECT "
|
+ ".openaire_pageviews_stats_tmp AS SELECT "
|
||||||
|
@ -209,27 +309,27 @@ public class PiwikStatsDB {
|
||||||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||||
|
|
||||||
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
|
//to remove logger.info("Dropping openaire_result_downloads_monthly_tmp view");
|
||||||
String drop_result_downloads_monthly = "DROP VIEW IF EXISTS "
|
// String drop_result_downloads_monthly = "DROP VIEW IF EXISTS "
|
||||||
+ ConnectDB.getUsageStatsDBSchema()
|
// + ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".openaire_result_downloads_monthly_tmp";
|
// + ".openaire_result_downloads_monthly_tmp";
|
||||||
stmt.executeUpdate(drop_result_downloads_monthly);
|
// stmt.executeUpdate(drop_result_downloads_monthly);
|
||||||
logger.info("Dropped openaire_result_downloads_monthly_tmp view");
|
// logger.info("Dropped openaire_result_downloads_monthly_tmp view");
|
||||||
|
//
|
||||||
logger.info("Creating openaire_result_downloads_monthly_tmp view");
|
// logger.info("Creating openaire_result_downloads_monthly_tmp view");
|
||||||
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
|
// String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".openaire_result_downloads_monthly_tmp "
|
// + ".openaire_result_downloads_monthly_tmp "
|
||||||
+ "AS SELECT entity_id, "
|
// + "AS SELECT entity_id, "
|
||||||
+ "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
|
// + "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
|
||||||
+ "COUNT(entity_id) as downloads, "
|
// + "COUNT(entity_id) as downloads, "
|
||||||
+ "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
// + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
// + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
||||||
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct where action='download' "
|
// + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct where action='download' "
|
||||||
+ "AND (source_item_type='oaItem' OR source_item_type='repItem') "
|
// + "AND (source_item_type='oaItem' OR source_item_type='repItem') "
|
||||||
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source "
|
// + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source "
|
||||||
+ "ORDER BY source, entity_id, month";
|
// + "ORDER BY source, entity_id, month";
|
||||||
stmt.executeUpdate(sql);
|
// stmt.executeUpdate(sql);
|
||||||
logger.info("Created openaire_result_downloads_monthly_tmp view");
|
//to remove logger.info("Created openaire_result_downloads_monthly_tmp view");
|
||||||
|
|
||||||
logger.info("Dropping openaire_downloads_stats_tmp table");
|
logger.info("Dropping openaire_downloads_stats_tmp table");
|
||||||
String drop_views_stats = "DROP TABLE IF EXISTS "
|
String drop_views_stats = "DROP TABLE IF EXISTS "
|
||||||
|
@ -239,7 +339,8 @@ public class PiwikStatsDB {
|
||||||
logger.info("Dropped openaire_downloads_stats_tmp table");
|
logger.info("Dropped openaire_downloads_stats_tmp table");
|
||||||
|
|
||||||
logger.info("Creating openaire_downloads_stats_tmp table");
|
logger.info("Creating openaire_downloads_stats_tmp table");
|
||||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp AS "
|
String sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_downloads_stats_tmp AS "
|
||||||
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
|
+ "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
|
||||||
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
||||||
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
||||||
|
@ -251,8 +352,74 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Created downloads_stats table");
|
logger.info("Created downloads_stats table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 630 in openaire_downloads_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_630 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_downloads_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::cfa5301358b9fcbe7aa45b1ceea088c6' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=630 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_630);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 630 in openaire_downloads_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 662 in openaire_downloads_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_662 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_downloads_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::4e86eaf2685a67b743a475f86c7c0086' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=662 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_662);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 662 in openaire_downloads_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 694 in openaire_downloads_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_694 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_downloads_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::f35fd567065af297ae65b621e0a21ae9' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=694 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_694);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 694 in openaire_downloads_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 725 in openaire_downloads_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_725 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_downloads_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::7180cffd6a8e829dacfc2a31b3f72ece' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=725 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_725);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 725 in openaire_downloads_stats_tmp table");
|
||||||
|
|
||||||
|
logger.info("Insert temp missing piwik_ids 728 in openaire_downloads_stats_tmp table");
|
||||||
|
String create_views_stats_missing_id_728 = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_downloads_stats_tmp "
|
||||||
|
+ "SELECT 'OpenAIRE' as source, 'opendoar____::8b3bac12926cc1d9fb5d68783376971d' as repository_id, ro.id as result_id, month as date, "
|
||||||
|
+ "max(downloads) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
|
||||||
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
|
+ "WHERE p.source=728 AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
|
||||||
|
+ "GROUP BY ro.id, month "
|
||||||
|
+ "ORDER BY ro.id, month ";
|
||||||
|
stmt.executeUpdate(create_views_stats_missing_id_728);
|
||||||
|
logger.info("Inserted temp missing piwik_ids 728 in openaire_downloads_stats_tmp table");
|
||||||
|
|
||||||
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
|
logger.info("Dropping openaire_result_downloads_monthly_tmp view");
|
||||||
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp";
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".openaire_result_downloads_monthly_tmp";
|
||||||
logger.info("Dropped openaire_result_downloads_monthly_tmp view ");
|
logger.info("Dropped openaire_result_downloads_monthly_tmp view ");
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
@ -298,6 +465,44 @@ public class PiwikStatsDB {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void uploadPangaeaLogs() throws Exception {
|
||||||
|
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||||
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
// Dropping Pangaea pangaea_views_stats_tmp table
|
||||||
|
logger.info("Dropping pangaea_views_stats_tmp table");
|
||||||
|
String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_views_stats_tmp";
|
||||||
|
logger.info("Dropped pangaea_views_stats_tmp table ");
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
// Dropping Pangaea pangaea_downloads_stats table
|
||||||
|
logger.info("Dropping pangaea_downloads_stats table");
|
||||||
|
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_downloads_stats";
|
||||||
|
logger.info("Dropped pangaea_downloads_stats table ");
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
// Creating Pangaea pangaea_views_stats_tmp table
|
||||||
|
logger.info("Creating Pangaea pangaea_views_stats_tmp table");
|
||||||
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_views_stats_tmp AS "
|
||||||
|
+ "SELECT 'PANGAEA' as source, 're3data_____::9633d1e8c4309c833c2c442abeb0cfeb' as repository_id,"
|
||||||
|
+ "r.id as result_id,date, cast(count as BIGINT) as count, 0 as openaire "
|
||||||
|
+ "FROM default.pangaeaviews p, " + ConnectDB.getStatsDBSchema()
|
||||||
|
+ ".result_oids r where r.oid=p.result_id";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Created pangaea_views_stats_tmp table ");
|
||||||
|
|
||||||
|
// Creating Pangaea pangaea_downloads_stats_tmp table
|
||||||
|
logger.info("Creating Pedocs pangaea_downloads_stats_tmp table");
|
||||||
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_downloads_stats_tmp AS "
|
||||||
|
+ "SELECT 'PANGAEA' as source, 're3data_____::9633d1e8c4309c833c2c442abeb0cfeb' as repository_id,"
|
||||||
|
+ "r.id as result_id, date, cast(count as BIGINT) as count, 0 as openaire "
|
||||||
|
+ "FROM default.pangaeadownloads p, " + ConnectDB.getStatsDBSchema()
|
||||||
|
+ ".result_oids r where r.oid=p.result_id";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Created pangaea_downloads_stats_tmp table ");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public void uploadTUDELFTStats() throws Exception {
|
public void uploadTUDELFTStats() throws Exception {
|
||||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||||
|
@ -499,7 +704,8 @@ public class PiwikStatsDB {
|
||||||
|
|
||||||
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
|
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
|
||||||
+ ConnectDB.getStatsDBSchema() +
|
+ ConnectDB.getStatsDBSchema() +
|
||||||
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
|
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true) "
|
||||||
|
+ "and websiteurl!='https://episciences.org/'";
|
||||||
|
|
||||||
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
||||||
.prepareStatement(returnEpisciencesJournals);
|
.prepareStatement(returnEpisciencesJournals);
|
||||||
|
@ -525,12 +731,11 @@ public class PiwikStatsDB {
|
||||||
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
|
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
|
||||||
+ "AS openaire_referrer, "
|
+ "AS openaire_referrer, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
||||||
+ "FROM " + ConnectDB.getUsageRawDataDBSchema()
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".episcienceslog where action='action' and (source_item_type='oaItem' or "
|
+ ".episcienceslogdistinct where action='action' and (source_item_type='oaItem' or "
|
||||||
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
|
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
|
||||||
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
|
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
|
||||||
+ "source ORDER BY source, entity_id";
|
+ "source ORDER BY source, entity_id";
|
||||||
|
|
||||||
stmt.executeUpdate(create_result_views_monthly);
|
stmt.executeUpdate(create_result_views_monthly);
|
||||||
logger.info("Created episciencesSuffix_result_views_monthly_tmp table");
|
logger.info("Created episciencesSuffix_result_views_monthly_tmp table");
|
||||||
|
|
||||||
|
@ -547,6 +752,23 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(insertIntoEpisciencesViewsTable);
|
stmt.executeUpdate(insertIntoEpisciencesViewsTable);
|
||||||
logger.info("Inserted episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
|
logger.info("Inserted episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
|
||||||
|
|
||||||
|
logger
|
||||||
|
.info(
|
||||||
|
"Inserting episciencesSuffix_result_views_monthly_tmp for Episciences into EpisciencesViews Table");
|
||||||
|
String insertIntoEpisciencesViewsAllTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".episciencesviews SELECT 'Episciences' as source, "
|
||||||
|
+ " 'openaire____::6824b298c96ba906a3e6a70593affbf5' as repository_id, ro.id as result_id, month as date,"
|
||||||
|
+ " max(views) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp p,"
|
||||||
|
+ ConnectDB.getStatsDBSchema()
|
||||||
|
+ ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
|
||||||
|
logger
|
||||||
|
.info(
|
||||||
|
"Inserted episciencesSuffix_result_views_monthly_tmp for Episciences into EpisciencesViews Table");
|
||||||
|
|
||||||
|
stmt.executeUpdate(insertIntoEpisciencesViewsAllTable);
|
||||||
|
|
||||||
stmt.executeUpdate(dropepisciencesSuffixView);
|
stmt.executeUpdate(dropepisciencesSuffixView);
|
||||||
logger.info("Dropped episciencesSuffix_result_views_monthly_tmp view");
|
logger.info("Dropped episciencesSuffix_result_views_monthly_tmp view");
|
||||||
}
|
}
|
||||||
|
@ -576,7 +798,8 @@ public class PiwikStatsDB {
|
||||||
|
|
||||||
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
|
String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
|
||||||
+ ConnectDB.getStatsDBSchema() +
|
+ ConnectDB.getStatsDBSchema() +
|
||||||
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
|
".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true) "
|
||||||
|
+ "and websiteurl!='https://episciences.org/'";
|
||||||
|
|
||||||
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
||||||
.prepareStatement(returnEpisciencesJournals);
|
.prepareStatement(returnEpisciencesJournals);
|
||||||
|
@ -600,8 +823,8 @@ public class PiwikStatsDB {
|
||||||
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
|
+ "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
|
||||||
+ "AS openaire_referrer, "
|
+ "AS openaire_referrer, "
|
||||||
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
+ "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
|
||||||
+ "FROM " + ConnectDB.getUsageRawDataDBSchema()
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".episcienceslog where action='download' and (source_item_type='oaItem' or "
|
+ ".episcienceslogdistinct where action='download' and (source_item_type='oaItem' or "
|
||||||
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
|
+ "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
|
||||||
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
|
+ "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
|
||||||
+ "source ORDER BY source, entity_id";
|
+ "source ORDER BY source, entity_id";
|
||||||
|
@ -622,6 +845,23 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(insertIntoEpisciencesDownloadsTable);
|
stmt.executeUpdate(insertIntoEpisciencesDownloadsTable);
|
||||||
logger.info("Inserted episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
|
logger.info("Inserted episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
|
||||||
|
|
||||||
|
logger
|
||||||
|
.info(
|
||||||
|
"Inserting episciencesSuffix_result_downloads_monthly_tmp for Episciences into EpisciencesDownloadsTable");
|
||||||
|
String insertIntoEpisciencesDownloadsAllTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ ".episciencesdownloads SELECT 'Episciences' as source, "
|
||||||
|
+ " 'openaire____::6824b298c96ba906a3e6a70593affbf5' as repository_id, ro.id as result_id, month as date,"
|
||||||
|
+ " max(views) AS count, max(openaire_referrer) AS openaire "
|
||||||
|
+ "FROM " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
+ "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp p,"
|
||||||
|
+ ConnectDB.getStatsDBSchema()
|
||||||
|
+ ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
|
||||||
|
|
||||||
|
stmt.executeUpdate(insertIntoEpisciencesDownloadsAllTable);
|
||||||
|
logger
|
||||||
|
.info(
|
||||||
|
"Inserted episciencesSuffix_result_downloads_monthly_tmp for Episciences into EpisciencesDownloadsTable");
|
||||||
|
|
||||||
stmt.executeUpdate(dropepisciencesSuffixDownloads);
|
stmt.executeUpdate(dropepisciencesSuffixDownloads);
|
||||||
logger.info("Dropped episciencesSuffix_result_downloads_monthly_tmp view");
|
logger.info("Dropped episciencesSuffix_result_downloads_monthly_tmp view");
|
||||||
|
|
||||||
|
@ -767,9 +1007,15 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Dropped Table tbl_all_r5_metrics");
|
logger.info("Dropped Table tbl_all_r5_metrics");
|
||||||
|
|
||||||
logger.info("Create Table tbl_all_r5_metrics");
|
// All CoP R5 metrics Table
|
||||||
|
logger.info("Drop Table counter_r5_stats_with_metrics");
|
||||||
|
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics ";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Dropped Table counter_r5_stats_with_metrics");
|
||||||
|
|
||||||
|
logger.info("Create Table counter_r5_stats_with_metrics");
|
||||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".tbl_all_r5_metrics as "
|
+ ".counter_r5_stats_with_metrics as "
|
||||||
+ "WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id, "
|
+ "WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id, "
|
||||||
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
|
+ "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
|
||||||
+ "coalesce(vs.unique_item_investigations, 0) as unique_item_investigations, "
|
+ "coalesce(vs.unique_item_investigations, 0) as unique_item_investigations, "
|
||||||
|
@ -793,7 +1039,7 @@ public class PiwikStatsDB {
|
||||||
+ "FROM tmp2 AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests "
|
+ "FROM tmp2 AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests "
|
||||||
+ "AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
|
+ "AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Created Table tbl_all_r5_metrics");
|
logger.info("Created Table counter_r5_stats_with_metrics");
|
||||||
stmt.close();
|
stmt.close();
|
||||||
ConnectDB.getHiveConnection().close();
|
ConnectDB.getHiveConnection().close();
|
||||||
|
|
||||||
|
@ -857,6 +1103,13 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Pedocs views updated to views_stats");
|
logger.info("Pedocs views updated to views_stats");
|
||||||
|
|
||||||
|
// Inserting Pangaea views stats
|
||||||
|
logger.info("Inserting Pangaea old data to views_stats");
|
||||||
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
||||||
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_views_stats_tmp";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Pangaea views updated to views_stats");
|
||||||
|
|
||||||
// Inserting TUDELFT views stats
|
// Inserting TUDELFT views stats
|
||||||
logger.info("Inserting TUDELFT data to views_stats");
|
logger.info("Inserting TUDELFT data to views_stats");
|
||||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
||||||
|
@ -878,6 +1131,12 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("B2SHARE views updated to views_stats");
|
logger.info("B2SHARE views updated to views_stats");
|
||||||
|
|
||||||
|
// Inserting Datacite views stats
|
||||||
|
logger.info("Inserting Datacite views to views_stats");
|
||||||
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
||||||
|
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_views";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
logger.info("Creating downloads_stats table");
|
logger.info("Creating downloads_stats table");
|
||||||
String createDownloadsStats = "CREATE TABLE IF NOT EXISTS "
|
String createDownloadsStats = "CREATE TABLE IF NOT EXISTS "
|
||||||
+ ConnectDB.getUsageStatsDBSchema()
|
+ ConnectDB.getUsageStatsDBSchema()
|
||||||
|
@ -893,7 +1152,7 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Inserted OpenAIRE data to downloads_stats");
|
logger.info("Inserted OpenAIRE data to downloads_stats");
|
||||||
|
|
||||||
// Inserting Episciences views stats
|
// Inserting Episciences downloads stats
|
||||||
logger.info("Inserting Episciences data to downloads_stats");
|
logger.info("Inserting Episciences data to downloads_stats");
|
||||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
||||||
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesdownloads";
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesdownloads";
|
||||||
|
@ -907,6 +1166,13 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Inserted Pedocs data to downloads_stats");
|
logger.info("Inserted Pedocs data to downloads_stats");
|
||||||
|
|
||||||
|
// Inserting Pangaea downloads stats
|
||||||
|
logger.info("Inserting Pangaea old data to downloads_stats");
|
||||||
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
||||||
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pangaea_downloads_stats_tmp";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Pangaea downloads updated to downloads_stats");
|
||||||
|
|
||||||
// Inserting TUDELFT downloads stats
|
// Inserting TUDELFT downloads stats
|
||||||
logger.info("Inserting TUDELFT data to downloads_stats");
|
logger.info("Inserting TUDELFT data to downloads_stats");
|
||||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
||||||
|
@ -920,6 +1186,7 @@ public class PiwikStatsDB {
|
||||||
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Inserted B2SHARE data to downloads_stats");
|
logger.info("Inserted B2SHARE data to downloads_stats");
|
||||||
|
|
||||||
// Inserting Lareferencia downloads stats
|
// Inserting Lareferencia downloads stats
|
||||||
logger.info("Inserting LaReferencia data to downloads_stats");
|
logger.info("Inserting LaReferencia data to downloads_stats");
|
||||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
||||||
|
@ -934,14 +1201,13 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("IRUS downloads updated to downloads_stats");
|
logger.info("IRUS downloads updated to downloads_stats");
|
||||||
|
|
||||||
// Inserting IRUS_R5 downloads stats
|
// Inserting IRUS_R5 views stats
|
||||||
logger.info("Inserting IRUS_R5 views to views_stats");
|
logger.info("Inserting IRUS_R5 views to views_stats");
|
||||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
||||||
+ "SELECT source, repository_id, result_id, `date`, views, openaire FROM "
|
+ "SELECT source, repository_id, result_id, `date`, views, openaire FROM "
|
||||||
+ ConnectDB.getUsageStatsDBSchema()
|
+ ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".irus_R5_stats_tmp";
|
+ ".irus_R5_stats_tmp";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("IRUS_R5 views updated to views_stats");
|
|
||||||
|
|
||||||
// Inserting IRUS_R5 downloads stats
|
// Inserting IRUS_R5 downloads stats
|
||||||
logger.info("Inserting IRUS_R5 data to downloads_stats");
|
logger.info("Inserting IRUS_R5 data to downloads_stats");
|
||||||
|
@ -950,7 +1216,6 @@ public class PiwikStatsDB {
|
||||||
+ ConnectDB.getUsageStatsDBSchema()
|
+ ConnectDB.getUsageStatsDBSchema()
|
||||||
+ ".irus_R5_stats_tmp";
|
+ ".irus_R5_stats_tmp";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("IRUS_R5 downloads updated to downloads_stats");
|
|
||||||
|
|
||||||
// Inserting SARC-OJS downloads stats
|
// Inserting SARC-OJS downloads stats
|
||||||
logger.info("Inserting SARC data to downloads_stats");
|
logger.info("Inserting SARC data to downloads_stats");
|
||||||
|
@ -959,19 +1224,11 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("SARC-OJS downloads updated to downloads_stats");
|
logger.info("SARC-OJS downloads updated to downloads_stats");
|
||||||
|
|
||||||
// Inserting Datacite views stats
|
|
||||||
logger.info("Inserting Datacite views to views_stats");
|
|
||||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
|
||||||
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_views";
|
|
||||||
stmt.executeUpdate(sql);
|
|
||||||
logger.info("Datacite views updated to views_stats");
|
|
||||||
|
|
||||||
// Inserting Datacite downloads stats
|
// Inserting Datacite downloads stats
|
||||||
logger.info("Inserting Datacite downloads to downloads_stats");
|
logger.info("Inserting Datacite downloads to downloads_stats");
|
||||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
||||||
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_downloads";
|
+ "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_downloads";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Datacite downloads updated to downloads_stats");
|
|
||||||
|
|
||||||
logger.info("Creating pageviews_stats table");
|
logger.info("Creating pageviews_stats table");
|
||||||
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
@ -986,28 +1243,6 @@ public class PiwikStatsDB {
|
||||||
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp";
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
logger.info("Dropping full_dates table");
|
|
||||||
String dropFullDates = "DROP TABLE IF EXISTS "
|
|
||||||
+ ConnectDB.getUsageStatsDBSchema()
|
|
||||||
+ ".full_dates";
|
|
||||||
stmt.executeUpdate(dropFullDates);
|
|
||||||
logger.info("Dropped full_dates table");
|
|
||||||
|
|
||||||
Calendar startCalendar = Calendar.getInstance();
|
|
||||||
startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
|
|
||||||
Calendar endCalendar = Calendar.getInstance();
|
|
||||||
int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
|
|
||||||
int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
|
|
||||||
|
|
||||||
logger.info("Creating full_dates table");
|
|
||||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".full_dates AS "
|
|
||||||
+ "SELECT from_unixtime(unix_timestamp(cast(add_months(from_date,i) AS DATE)), 'yyyy/MM') AS txn_date "
|
|
||||||
+ "FROM (SELECT DATE '2016-01-01' AS from_date) p "
|
|
||||||
+ "LATERAL VIEW "
|
|
||||||
+ "posexplode(split(space(" + diffMonth + "),' ')) pe AS i,x";
|
|
||||||
stmt.executeUpdate(sql);
|
|
||||||
logger.info("Created full_dates table");
|
|
||||||
|
|
||||||
logger.info("Inserting data to usage_stats");
|
logger.info("Inserting data to usage_stats");
|
||||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats AS "
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats AS "
|
||||||
+ "SELECT coalesce(ds.source, vs.source) as source, "
|
+ "SELECT coalesce(ds.source, vs.source) as source, "
|
||||||
|
@ -1022,15 +1257,77 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Inserted data to usage_stats");
|
logger.info("Inserted data to usage_stats");
|
||||||
|
|
||||||
|
// Dropping project_stats table
|
||||||
|
logger.info("Dropping project_stats table");
|
||||||
|
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".project_stats";
|
||||||
|
logger.info("Dropped project_stats table ");
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
// Dropping datasource_stats table
|
||||||
|
logger.info("Dropping datasource_stats table");
|
||||||
|
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".datasource_stats";
|
||||||
|
logger.info("Dropped datasource_stats table ");
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
logger.info("Inserting data to project_downloads");
|
||||||
|
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".project_downloads as "
|
||||||
|
+ " select pr.id, sum(count) downloads, sum(openaire) openaire_downloads,`date` "
|
||||||
|
+ " from " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
|
||||||
|
+ " join " + ConnectDB.getStatsDBSchema() + ".project_results pr on result_id=pr.result "
|
||||||
|
+ " join " + ConnectDB.getStatsDBSchema() + ".project p on p.id=pr.id "
|
||||||
|
+ " group by pr.id,`date`";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Inserted data to projects_downloads");
|
||||||
|
|
||||||
|
logger.info("Inserting data to project_views");
|
||||||
|
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".project_views as "
|
||||||
|
+ " select pr.id, sum(count) views, sum(openaire) openaire_views,`date` "
|
||||||
|
+ " from " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
|
||||||
|
+ " join " + ConnectDB.getStatsDBSchema() + ".project_results pr on result_id=pr.result "
|
||||||
|
+ " join " + ConnectDB.getStatsDBSchema() + ".project p on p.id=pr.id "
|
||||||
|
+ " group by pr.id,`date`";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Inserted data to project_views");
|
||||||
|
|
||||||
|
logger.info("Inserting data to project_stats");
|
||||||
|
sql = " CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".project_stats as "
|
||||||
|
+ " SELECT coalesce(pv.id, pd.id) as id, coalesce(pd.`date`, pv.`date`) as `date`, "
|
||||||
|
+ " coalesce(pv.views, 0) as views, coalesce(pd.downloads, 0) as downloads, "
|
||||||
|
+ " coalesce(pv.openaire_views,0) as openaire_views,coalesce(pd.openaire_downloads, 0) as openaire_downloads "
|
||||||
|
+ " FROM " + ConnectDB.getUsageStatsDBSchema() + ".project_downloads pd "
|
||||||
|
+ " FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".project_views pv "
|
||||||
|
+ " ON pd.id=pv.id WHERE pd.`date`=pv.`date`";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
logger.info("Inserted data to project_stats");
|
||||||
|
|
||||||
|
logger.info("Inserting data to datasource_stats");
|
||||||
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".datasource_stats AS "
|
||||||
|
+ " with datasource_views as "
|
||||||
|
+ " (select repository_id, sum(views) views, sum(openaire_views) openaire_views,`date` "
|
||||||
|
+ " from " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats group by repository_id,`date`), "
|
||||||
|
+ " datasource_downloads as "
|
||||||
|
+ " (select repository_id, sum(downloads) downloads,sum(openaire_downloads) openaire_downloads,`date` "
|
||||||
|
+ " from " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats group by repository_id,`date`)"
|
||||||
|
+ " SELECT coalesce(dv.repository_id, dd.repository_id) as repository_id, coalesce(dd.`date`, dv.`date`) as `date`,"
|
||||||
|
+ " coalesce(dv.views, 0) as views, coalesce(dd.downloads, 0) as downloads, "
|
||||||
|
+ " coalesce(dv.openaire_views) as openaire_views,coalesce(dd.openaire_downloads, 0) as openaire_downloads "
|
||||||
|
+ " FROM datasource_downloads dd "
|
||||||
|
+ " FULL OUTER JOIN "
|
||||||
|
+ " datasource_views dv ON dd.repository_id=dv.repository_id WHERE dd.`date`=dv.`date`";
|
||||||
|
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Inserted data to datasource_stats");
|
||||||
|
|
||||||
// Inserting LaReferencia CoP R5 Metrics
|
// Inserting LaReferencia CoP R5 Metrics
|
||||||
logger.info("Inserting Lareferencia data to tbl_all_r5_metrics");
|
logger.info("Inserting Lareferencia data to counter_r5_stats_with_metrics");
|
||||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
|
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics "
|
||||||
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_all_r5_metrics";
|
+ "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_all_r5_metrics";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
// Inserting IRUS-UK CoP R5 Metrics
|
// Inserting IRUS-UK CoP R5 Metrics
|
||||||
logger.info("Inserting IRUS-UK data into tbl_all_r5_metrics");
|
logger.info("Inserting IRUS-UK data into counter_r5_stats_with_metrics");
|
||||||
String insertΡ5Stats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
|
String insertΡ5Stats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics "
|
||||||
+ "SELECT s.source, d.id AS repository_id, "
|
+ "SELECT s.source, d.id AS repository_id, "
|
||||||
+ "ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, "
|
+ "ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, "
|
||||||
+ "s.unique_item_investigations , s.total_item_investigations, "
|
+ "s.unique_item_investigations , s.total_item_investigations, "
|
||||||
|
@ -1040,7 +1337,7 @@ public class PiwikStatsDB {
|
||||||
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
+ ConnectDB.getStatsDBSchema() + ".result_oids ro "
|
||||||
+ "WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK'";
|
+ "WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK'";
|
||||||
stmt.executeUpdate(insertΡ5Stats);
|
stmt.executeUpdate(insertΡ5Stats);
|
||||||
logger.info("Inserted IRUS-UK data into tbl_all_r5_metrics");
|
logger.info("Inserted IRUS-UK data into counter_r5_stats_with_metrics");
|
||||||
|
|
||||||
logger.info("Building views at permanent DB starts at: " + new Timestamp(System.currentTimeMillis()));
|
logger.info("Building views at permanent DB starts at: " + new Timestamp(System.currentTimeMillis()));
|
||||||
|
|
||||||
|
@ -1088,6 +1385,28 @@ public class PiwikStatsDB {
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Created view on usage_stats on permanent usagestats DB");
|
logger.info("Created view on usage_stats on permanent usagestats DB");
|
||||||
|
|
||||||
|
logger.info("Dropping view projects_stats on permanent usagestats DB");
|
||||||
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".projects_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Dropped view on projects_stats on permanent usagestats DB");
|
||||||
|
|
||||||
|
logger.info("Create view on project_stats on permanent usagestats DB");
|
||||||
|
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".project_stats"
|
||||||
|
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".project_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Created view on project_stats on permanent usagestats DB");
|
||||||
|
|
||||||
|
logger.info("Dropping view datasource_stats on permanent usagestats DB");
|
||||||
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".datasource_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Dropped view on projects_stats on permanent usagestats DB");
|
||||||
|
|
||||||
|
logger.info("Create view on datasource_stats on permanent usagestats DB");
|
||||||
|
sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsagestatsPermanentDBSchema() + ".datasource_stats"
|
||||||
|
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".datasource_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
logger.info("Created view on project_stats on permanent usagestats DB");
|
||||||
|
|
||||||
logger.info("Dropping view COUNTER_R5_Metrics on permanent usagestats DB");
|
logger.info("Dropping view COUNTER_R5_Metrics on permanent usagestats DB");
|
||||||
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".counter_r5_stats_with_metrics";
|
sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".counter_r5_stats_with_metrics";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
@ -1096,7 +1415,7 @@ public class PiwikStatsDB {
|
||||||
logger.info("Create view on COUNTER_R5_Metrics on permanent usagestats DB");
|
logger.info("Create view on COUNTER_R5_Metrics on permanent usagestats DB");
|
||||||
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema()
|
sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema()
|
||||||
+ ".counter_r5_stats_with_metrics"
|
+ ".counter_r5_stats_with_metrics"
|
||||||
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics";
|
+ " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
logger.info("Created view on COUNTER_R5_Metrics on permanent usagestats DB");
|
logger.info("Created view on COUNTER_R5_Metrics on permanent usagestats DB");
|
||||||
|
|
||||||
|
|
|
@ -29,21 +29,16 @@ public class UsageStatsExporter {
|
||||||
logger.info("Initialising DB properties");
|
logger.info("Initialising DB properties");
|
||||||
ConnectDB.init();
|
ConnectDB.init();
|
||||||
|
|
||||||
// runImpalaQuery();
|
|
||||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB();
|
PiwikStatsDB piwikstatsdb = new PiwikStatsDB();
|
||||||
logger.info("Re-creating database and tables");
|
//to remove logger.info("Re-creating database and tables");
|
||||||
if (ExecuteWorkflow.recreateDbAndTables) {
|
// if (ExecuteWorkflow.recreateDbAndTables) {
|
||||||
piwikstatsdb.recreateDBAndTables();
|
// piwikstatsdb.recreateDBAndTables();
|
||||||
logger.info("DB-Tables are created ");
|
// logger.info("DB-Tables are created ");
|
||||||
}
|
//to remove }
|
||||||
// else {
|
|
||||||
// piwikstatsdb.createTmpTables();
|
|
||||||
// logger.info("TmpTables are created ");
|
|
||||||
// }
|
|
||||||
if (ExecuteWorkflow.processPiwikLogs) {
|
if (ExecuteWorkflow.processPiwikLogs) {
|
||||||
logger.info("Creating distinct piwik log");
|
//to remove logger.info("Creating distinct piwik log");
|
||||||
piwikstatsdb.createDistinctPiwikLog();
|
// piwikstatsdb.createDistinctPiwikLog();
|
||||||
logger.info("Processing OpenAIRE logs");
|
//to remove logger.info("Processing OpenAIRE logs");
|
||||||
piwikstatsdb.processLogs();
|
piwikstatsdb.processLogs();
|
||||||
logger.info("OpenAIRE logs Done");
|
logger.info("OpenAIRE logs Done");
|
||||||
logger.info("Processing Episciences logs");
|
logger.info("Processing Episciences logs");
|
||||||
|
@ -52,6 +47,9 @@ public class UsageStatsExporter {
|
||||||
logger.info("Processing Pedocs Old Stats");
|
logger.info("Processing Pedocs Old Stats");
|
||||||
piwikstatsdb.uploadOldPedocs();
|
piwikstatsdb.uploadOldPedocs();
|
||||||
logger.info("Processing Pedocs Old Stats Done");
|
logger.info("Processing Pedocs Old Stats Done");
|
||||||
|
logger.info("Processing Pangaea Stats");
|
||||||
|
piwikstatsdb.uploadPangaeaLogs();
|
||||||
|
logger.info("Processing Pangaea Stats Done");
|
||||||
logger.info("Processing TUDELFT Stats");
|
logger.info("Processing TUDELFT Stats");
|
||||||
piwikstatsdb.uploadTUDELFTStats();
|
piwikstatsdb.uploadTUDELFTStats();
|
||||||
logger.info("Processing TUDELFT Stats Done");
|
logger.info("Processing TUDELFT Stats Done");
|
||||||
|
@ -116,6 +114,18 @@ public class UsageStatsExporter {
|
||||||
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".project_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".datasource_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".counter_r5_stats_with_metrics";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats";
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
@ -125,6 +135,12 @@ public class UsageStatsExporter {
|
||||||
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats";
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".project_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".datasource_stats";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats";
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats";
|
||||||
stmt.executeUpdate(sql);
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
|
|
@ -9,8 +9,34 @@ fi
|
||||||
export SOURCE=$1
|
export SOURCE=$1
|
||||||
export PRODUCTION=$2
|
export PRODUCTION=$2
|
||||||
|
|
||||||
|
#echo "Updating ${PRODUCTION} database"
|
||||||
|
#impala-shell -q "create database if not exists ${PRODUCTION}"
|
||||||
|
#impala-shell -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f -
|
||||||
|
#impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
|
||||||
|
#echo "Production db ready!"
|
||||||
|
|
||||||
echo "Updating ${PRODUCTION} database"
|
echo "Updating ${PRODUCTION} database"
|
||||||
impala-shell -q "create database if not exists ${PRODUCTION}"
|
impala-shell -q "create database if not exists ${PRODUCTION}"
|
||||||
impala-shell -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f -
|
impala-shell -d ${PRODUCTION} -q "DROP VIEW IF EXISTS ${PRODUCTION}.views_stats;"
|
||||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
|
impala-shell -d ${PRODUCTION} -q "CREATE VIEW ${PRODUCTION}.views_stats as SELECT * from ${SOURCE}.views_stats;"
|
||||||
echo "Production db ready!"
|
impala-shell -d ${PRODUCTION} -q "INVALIDATE METADATA ${PRODUCTION}.views_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "DROP VIEW IF EXISTS ${PRODUCTION}.pageviews_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "CREATE VIEW ${PRODUCTION}.pageviews_stats as SELECT * from ${SOURCE}.pageviews_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "CREATE VIEW ${PRODUCTION}.pageviews_stats as SELECT * from ${SOURCE}.pageviews_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "INVALIDATE METADATA ${PRODUCTION}.pageviews_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "DROP VIEW IF EXISTS ${PRODUCTION}.downloads_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "CREATE VIEW ${PRODUCTION}.downloads_stats as SELECT * from ${SOURCE}.downloads_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "INVALIDATE METADATA ${PRODUCTION}.downloads_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "DROP VIEW IF EXISTS ${PRODUCTION}.usage_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "CREATE VIEW ${PRODUCTION}.usage_stats as SELECT * from ${SOURCE}.usage_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "INVALIDATE METADATA ${PRODUCTION}.usage_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "DROP VIEW IF EXISTS ${PRODUCTION}.project_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "CREATE VIEW ${PRODUCTION}.project_stats as SELECT * from ${SOURCE}.project_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "INVALIDATE METADATA ${PRODUCTION}.project_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "DROP VIEW IF EXISTS ${PRODUCTION}.datasource_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "CREATE VIEW ${PRODUCTION}.datasource_stats as SELECT * from ${SOURCE}.datasource_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "INVALIDATE METADATA ${PRODUCTION}.datasource_stats;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "DROP VIEW IF EXISTS ${PRODUCTION}.counter_r5_stats_with_metrics;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "CREATE VIEW ${PRODUCTION}.counter_r5_stats_with_metrics as SELECT * from ${SOURCE}.counter_r5_stats_with_metrics;"
|
||||||
|
impala-shell -d ${PRODUCTION} -q "INVALIDATE METADATA ${PRODUCTION}.counter_r5_stats_with_metrics;"
|
||||||
|
echo "Production db ready!"
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
<workflow-app name="Usage Stats" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="Usage Stats" xmlns="uri:oozie:workflow:0.5">
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>usage_stats_db_name</name>
|
<name>usage_stats_db</name>
|
||||||
<description>the target usage stats database name</description>
|
<description>the target usage stats database name</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>usage_stats_db_production_name</name>
|
<name>usage_stats_db_production</name>
|
||||||
<description>the name of the public production usage stats database</description>
|
<description>the name of the public production usage stats database</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
|
@ -48,8 +48,8 @@
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
<name-node>${nameNode}</name-node>
|
<name-node>${nameNode}</name-node>
|
||||||
<exec>updateProductionViews.sh</exec>
|
<exec>updateProductionViews.sh</exec>
|
||||||
<argument>${usage_stats_db_name}</argument>
|
<argument>${usage_stats_db}</argument>
|
||||||
<argument>${usage_stats_db_production_name}</argument>
|
<argument>${usage_stats_db_production}</argument>
|
||||||
<file>updateProductionViews.sh</file>
|
<file>updateProductionViews.sh</file>
|
||||||
</shell>
|
</shell>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -1,91 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
||||||
<parent>
|
|
||||||
<artifactId>dhp-workflows</artifactId>
|
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
|
||||||
<version>1.2.4-SNAPSHOT</version>
|
|
||||||
</parent>
|
|
||||||
<modelVersion>4.0.0</modelVersion>
|
|
||||||
<artifactId>dhp-usage-stats-build</artifactId>
|
|
||||||
<build>
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>pl.project13.maven</groupId>
|
|
||||||
<artifactId>git-commit-id-plugin</artifactId>
|
|
||||||
<version>2.1.15</version>
|
|
||||||
<executions>
|
|
||||||
<execution>
|
|
||||||
<goals>
|
|
||||||
<goal>revision</goal>
|
|
||||||
</goals>
|
|
||||||
</execution>
|
|
||||||
</executions>
|
|
||||||
<configuration>
|
|
||||||
<dotGitDirectory>${project.basedir}/../.git</dotGitDirectory>
|
|
||||||
<!-- more config here as you see fit -->
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
|
||||||
<version>3.6.1</version>
|
|
||||||
<configuration>
|
|
||||||
<source>1.8</source>
|
|
||||||
<target>1.8</target>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
|
||||||
<properties>
|
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
|
||||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
|
||||||
<cdh.hive.version>0.13.1-cdh5.2.1</cdh.hive.version>
|
|
||||||
<cdh.hadoop.version>2.5.0-cdh5.2.1</cdh.hadoop.version>
|
|
||||||
</properties>
|
|
||||||
|
|
||||||
<dependencies>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
|
||||||
<version>2.2.0</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-sql_2.11</artifactId>
|
|
||||||
<version>2.4.5</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.googlecode.json-simple</groupId>
|
|
||||||
<artifactId>json-simple</artifactId>
|
|
||||||
<version>1.1.1</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.json</groupId>
|
|
||||||
<artifactId>json</artifactId>
|
|
||||||
<version>20180130</version>
|
|
||||||
<type>jar</type>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hive</groupId>
|
|
||||||
<artifactId>hive-jdbc</artifactId>
|
|
||||||
<version>${cdh.hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-common</artifactId>
|
|
||||||
<version>${cdh.hadoop.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
|
||||||
<artifactId>dhp-common</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>c3p0</groupId>
|
|
||||||
<artifactId>c3p0</artifactId>
|
|
||||||
<version>0.9.1.2</version>
|
|
||||||
<type>jar</type>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
|
||||||
<name>dhp-usage-stats-build</name>
|
|
||||||
</project>
|
|
Loading…
Reference in New Issue