Adding correct logger everywhere, cleaning code, removing sysouts

This commit is contained in:
Spyros Zoupanos 2020-10-02 16:25:21 +03:00
parent c6bbe215e1
commit 07e750939f
11 changed files with 300 additions and 371 deletions

View File

@ -6,11 +6,14 @@
package eu.dnetlib.oa.graph.usagestats.export;
/*
@author dpie
/**
*
* @author D. Pierrakos, S. Zoupanos
*
*/
/*
@author dpie
/**
* @author D. Pierrakos, S. Zoupanos
*/
import java.sql.Connection;
import java.sql.DriverManager;
@ -35,22 +38,7 @@ public abstract class ConnectDB {
static void init(Properties properties) throws ClassNotFoundException {
// To be initialized by a property file
// dbURL = properties.getProperty("Stats_db_Url");
// dbUsername = properties.getProperty("Stats_db_User");
// dbPassword = properties.getProperty("Stats_db_Pass");
// defaultDBSchema = properties.getProperty("Stats_db_Schema");
//
// Class.forName(properties.getProperty("Stats_db_Driver"));
dbHiveUrl = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1";
// dbImpalaUrl = "jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/;UseNativeQuery=1";
// dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/;UseNativeQuery=1";
// dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/openaire_prod_stats_20200731;UID=spyros;PWD=RU78N9sqQndnH3SQ;UseNativeQuery=1";
// dbImpalaUrl = "jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/openaire_prod_stats_20200731;UID=spyros;PWD=RU78N9sqQndnH3SQ;UseNativeQuery=1";
// dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/openaire_prod_stats_shadow_20200821;user=spyros;PWD=RU78N9sqQndnH3SQ";
// dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:28000/;transportMode=http";
dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/;auth=noSasl";
usageStatsDBSchema = "usagestats_20200913";
statsDBSchema = "openaire_prod_stats_shadow_20200821";

View File

@ -11,7 +11,7 @@ import org.apache.commons.io.IOUtils;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
/**
* @author dpie, Spyros Zoupanos
* @author D. Pierrakos, S. Zoupanos
*/
public class ExecuteWorkflow {

View File

@ -1,18 +1,9 @@
package eu.dnetlib.oa.graph.usagestats.export;
/**
* @author dpie
*/
/**
* @author dpie
*/
import java.io.*;
// import java.io.BufferedReader;
// import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
@ -24,28 +15,26 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Created by dpie on 20/01/2020.
* @author D. Pierrakos, S. Zoupanos
*/
public class IrusStats {
private String irusUKURL;
// private Connection conn = null;
// private Statement stmt = null;
private final Logger log = Logger.getLogger(this.getClass());
private static final Logger logger = LoggerFactory.getLogger(IrusStats.class);
public IrusStats(String irusUKURL) throws Exception {
this.irusUKURL = irusUKURL;
System.out.println("====> Creating Irus Stats tables");
logger.info("Creating Irus Stats tables");
createTables();
System.out.println("====> Created Irus Stats tables");
logger.info("Created Irus Stats tables");
// The following may not be needed - It will be created when JSON tables are created
// createTmpTables();
}
@ -53,14 +42,14 @@ public class IrusStats {
private void createTables() throws Exception {
try {
System.out.println("====> Creating sushilog");
logger.info("Creating sushilog");
Statement stmt = ConnectDB.getHiveConnection().createStatement();
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".sushilog(source STRING, " +
"repository STRING, rid STRING, date STRING, metric_type STRING, count INT) clustered by (source, " +
"repository, rid, date, metric_type) into 100 buckets stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(sqlCreateTableSushiLog);
System.out.println("====> Created sushilog");
logger.info("Created sushilog");
// To see how to apply to the ignore duplicate rules and indexes
// stmt.executeUpdate(sqlCreateTableSushiLog);
@ -76,9 +65,9 @@ public class IrusStats {
stmt.close();
ConnectDB.getHiveConnection().close();
log.info("Sushi Tables Created");
logger.info("Sushi Tables Created");
} catch (Exception e) {
log.error("Failed to create tables: " + e);
logger.error("Failed to create tables: " + e);
throw new Exception("Failed to create tables: " + e.toString(), e);
}
}
@ -115,18 +104,18 @@ public class IrusStats {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Adding JSON Serde jar");
logger.info("Adding JSON Serde jar");
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
System.out.println("====> Added JSON Serde jar");
logger.info("Added JSON Serde jar");
System.out.println("====> Dropping sushilogtmp_json table");
logger.info("Dropping sushilogtmp_json table");
String drop_sushilogtmp_json = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".sushilogtmp_json";
stmt.executeUpdate(drop_sushilogtmp_json);
System.out.println("====> Dropped sushilogtmp_json table");
logger.info("Dropped sushilogtmp_json table");
System.out.println("====> Creating sushilogtmp_json table");
logger.info("Creating sushilogtmp_json table");
String create_sushilogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp_json(\n" +
" `ItemIdentifier` ARRAY<\n" +
@ -152,25 +141,25 @@ public class IrusStats {
"LOCATION '" + ExecuteWorkflow.irusUKReportPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_sushilogtmp_json);
System.out.println("====> Created sushilogtmp_json table");
logger.info("Created sushilogtmp_json table");
System.out.println("====> Dropping sushilogtmp table");
logger.info("Dropping sushilogtmp table");
String drop_sushilogtmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".sushilogtmp";
stmt.executeUpdate(drop_sushilogtmp);
System.out.println("====> Dropped sushilogtmp table");
logger.info("Dropped sushilogtmp table");
System.out.println("====> Creating sushilogtmp table");
logger.info("Creating sushilogtmp table");
String create_sushilogtmp = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema()
+ ".sushilogtmp(source STRING, repository STRING, " +
"rid STRING, date STRING, metric_type STRING, count INT) clustered by (source) into 100 buckets stored as orc "
+
"tblproperties('transactional'='true')";
stmt.executeUpdate(create_sushilogtmp);
System.out.println("====> Created sushilogtmp table");
logger.info("Created sushilogtmp table");
System.out.println("====> Inserting to sushilogtmp table");
logger.info("Inserting to sushilogtmp table");
String insert_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp " +
"SELECT 'IRUS-UK', 'opendoar____::', `ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " +
"`ItemPerf`.`Instance`.`MetricType`, `ItemPerf`.`Instance`.`Count` " +
@ -179,7 +168,7 @@ public class IrusStats {
"LATERAL VIEW posexplode(ItemPerformance) ItemPerformanceTable AS seqp, ItemPerf " +
"WHERE `ItemIdent`.`Type`= 'OAI'";
stmt.executeUpdate(insert_sushilogtmp);
System.out.println("====> Inserted to sushilogtmp table");
logger.info("Inserted to sushilogtmp table");
ConnectDB.getHiveConnection().close();
@ -214,8 +203,7 @@ public class IrusStats {
+ simpleDateFormat.format(new Date())
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
System.out.println("====> (processIrusRRReport) Getting report: " + reportUrl);
log.info("Getting Irus report: " + reportUrl);
logger.info("(processIrusRRReport) Getting report: " + reportUrl);
String text = getJson(reportUrl, "", "");
@ -235,8 +223,6 @@ public class IrusStats {
for (Object identifier : itemIdentifier) {
JSONObject opendoar = (JSONObject) identifier;
if (opendoar.get("Type").toString().equals("OpenDOAR")) {
// System.out.println(i + ": " + opendoar.get("Value").toString());
log.info(i + ": " + opendoar.get("Value").toString());
i++;
getIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
break;
@ -245,12 +231,12 @@ public class IrusStats {
// break;
}
System.out.println("====> (processIrusRRReport) Finished with report: " + reportUrl);
logger.info("(processIrusRRReport) Finished with report: " + reportUrl);
}
private void getIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
System.out.println("====> (processIrusIRReport) Getting report(s) with opendoar: " + opendoar);
logger.info("(processIrusIRReport) Getting report(s) with opendoar: " + opendoar);
ConnectDB.getHiveConnection().setAutoCommit(false);
@ -327,7 +313,7 @@ public class IrusStats {
preparedStatement.executeBatch();
ConnectDB.getHiveConnection().close();
System.out.println("====> (processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
logger.info("(processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
}
private String getJson(String url) throws Exception {
@ -352,7 +338,7 @@ public class IrusStats {
return response.toString();
} catch (Exception e) {
log.error("Failed to get URL: " + e);
logger.error("Failed to get URL: " + e);
System.out.println("Failed to get URL: " + e);
throw new Exception("Failed to get URL: " + e.toString(), e);
}
@ -376,7 +362,7 @@ public class IrusStats {
}
return response.toString();
} catch (Exception e) {
log.error("Failed to get URL", e);
logger.error("Failed to get URL", e);
return null;
}
}

View File

@ -15,11 +15,15 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author D. Pierrakos, S. Zoupanos
*/
public class LaReferenciaDownloadLogs {
private final String piwikUrl;
@ -33,7 +37,7 @@ public class LaReferenciaDownloadLogs {
private final String format = "&format=json";
private final String ApimethodGetAllSites = "?module=API&method=SitesManager.getSitesWithViewAccess";
private final Logger log = Logger.getLogger(this.getClass());
private static final Logger logger = LoggerFactory.getLogger(LaReferenciaDownloadLogs.class);
public LaReferenciaDownloadLogs(String piwikUrl, String tokenAuth) throws Exception {
this.piwikUrl = piwikUrl;
@ -46,7 +50,7 @@ public class LaReferenciaDownloadLogs {
try {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
System.out.println("====> Creating LaReferencia tables");
logger.info("Creating LaReferencia tables");
String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".lareferencialog(matomoid INT, " +
"source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " +
@ -54,7 +58,7 @@ public class LaReferenciaDownloadLogs {
"clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " +
"stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(sqlCreateTableLareferenciaLog);
System.out.println("====> Created LaReferencia tables");
logger.info("Created LaReferencia tables");
// String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
// + " ON INSERT TO lareferencialog "
// + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit,"
@ -67,10 +71,10 @@ public class LaReferenciaDownloadLogs {
stmt.close();
ConnectDB.getHiveConnection().close();
log.info("Lareferencia Tables Created");
logger.info("Lareferencia Tables Created");
} catch (Exception e) {
log.error("Failed to create tables: " + e);
logger.error("Failed to create tables: " + e);
throw new Exception("Failed to create tables: " + e.toString(), e);
// System.exit(0);
}
@ -121,7 +125,7 @@ public class LaReferenciaDownloadLogs {
return response.toString();
} catch (Exception e) {
log.error("Failed to get URL: " + e);
logger.error("Failed to get URL: " + e);
throw new Exception("Failed to get URL: " + e.toString(), e);
}
}
@ -144,7 +148,7 @@ public class LaReferenciaDownloadLogs {
public void GetLaReFerenciaLogs(String repoLogsPath,
int laReferencialMatomoID) throws Exception {
System.out.println("====> Downloading logs for LaReferencia repoid " + laReferencialMatomoID);
logger.info("Downloading logs for LaReferencia repoid " + laReferencialMatomoID);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
@ -174,14 +178,10 @@ public class LaReferenciaDownloadLogs {
rs_date.close();
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
log
logger
.info(
"Downloading logs for LaReferencia repoid " + laReferencialMatomoID + " and for "
+ sdf.format(date));
System.out
.println(
"====> Downloading logs for LaReferencia repoid " + laReferencialMatomoID + " and for "
+ sdf.format(date));
String period = "&period=day&date=" + sdf.format(date);
String outFolder = "";
@ -215,9 +215,9 @@ public class LaReferenciaDownloadLogs {
fin.writeChar('\n');
}
System.out
.println(
"====> Downloaded part " + i + " of logs for LaReferencia repoid " + laReferencialMatomoID
logger
.info(
"Downloaded part " + i + " of logs for LaReferencia repoid " + laReferencialMatomoID
+ " and for "
+ sdf.format(date));
i++;

View File

@ -18,18 +18,23 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author D. Pierrakos, S. Zoupanos
*/
public class LaReferenciaStats {
private static final Logger logger = LoggerFactory.getLogger(LaReferenciaStats.class);
private String logRepoPath;
private Statement stmt = null;
private final Logger log = Logger.getLogger(this.getClass());
private String CounterRobotsURL;
private ArrayList robotsList;
@ -47,7 +52,7 @@ public class LaReferenciaStats {
try {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
System.out.println("====> Creating LaReferencia tables");
logger.info("Creating LaReferencia tables");
String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".lareferencialog(matomoid INT, " +
"source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " +
@ -55,7 +60,7 @@ public class LaReferenciaStats {
"clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " +
"stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(sqlCreateTableLareferenciaLog);
System.out.println("====> Created LaReferencia tables");
logger.info("Created LaReferencia tables");
// String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
// + " ON INSERT TO lareferencialog "
// + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit,"
@ -68,10 +73,10 @@ public class LaReferenciaStats {
stmt.close();
ConnectDB.getHiveConnection().close();
log.info("Lareferencia Tables Created");
logger.info("Lareferencia Tables Created");
} catch (Exception e) {
log.error("Failed to create tables: " + e);
logger.error("Failed to create tables: " + e);
throw new Exception("Failed to create tables: " + e.toString(), e);
// System.exit(0);
}
@ -103,31 +108,26 @@ public class LaReferenciaStats {
public void processLogs() throws Exception {
try {
System.out.println("====> Processing LaReferencia repository logs");
logger.info("Processing LaReferencia repository logs");
// processlaReferenciaLog();
System.out.println("====> LaReferencia repository logs process done");
log.info("LaReferencia repository process done");
logger.info("LaReferencia repository logs process done");
System.out.println("====> LaReferencia removing double clicks");
logger.info("LaReferencia removing double clicks");
// removeDoubleClicks();
System.out.println("====> LaReferencia removed double clicks");
log.info("LaReferencia removing double clicks done");
logger.info("LaReferencia removed double clicks");
System.out.println("====> LaReferencia creating viewsStats");
logger.info("LaReferencia creating viewsStats");
// viewsStats();
System.out.println("====> LaReferencia created viewsStats");
log.info("LaReferencia views done");
System.out.println("====> LaReferencia creating downloadsStats");
logger.info("LaReferencia created viewsStats");
logger.info("LaReferencia creating downloadsStats");
// downloadsStats();
System.out.println("====> LaReferencia created downloadsStats");
log.info("LaReferencia downloads done");
System.out.println("====> LaReferencia updating Production Tables");
logger.info("LaReferencia created downloadsStats");
logger.info("LaReferencia updating Production Tables");
updateProdTables();
System.out.println("====> LaReferencia updated Production Tables");
log.info("LaReferencia update productions tables done");
logger.info("LaReferencia updated Production Tables");
} catch (Exception e) {
log.error("Failed to process logs: " + e);
logger.error("Failed to process logs: " + e);
throw new Exception("Failed to process logs: " + e.toString(), e);
}
}
@ -136,18 +136,18 @@ public class LaReferenciaStats {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Adding JSON Serde jar");
logger.info("Adding JSON Serde jar");
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
System.out.println("====> Added JSON Serde jar");
logger.info("Added JSON Serde jar");
System.out.println("====> Dropping lareferencialogtmp_json table");
logger.info("Dropping lareferencialogtmp_json table");
String drop_lareferencialogtmp_json = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".lareferencialogtmp_json";
stmt.executeUpdate(drop_lareferencialogtmp_json);
System.out.println("====> Dropped lareferencialogtmp_json table");
logger.info("Dropped lareferencialogtmp_json table");
System.out.println("====> Creating lareferencialogtmp_json");
logger.info("Creating lareferencialogtmp_json");
String create_lareferencialogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".lareferencialogtmp_json(\n" +
@ -177,16 +177,16 @@ public class LaReferenciaStats {
"LOCATION '" + ExecuteWorkflow.lareferenciaLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_lareferencialogtmp_json);
System.out.println("====> Created lareferencialogtmp_json");
logger.info("Created lareferencialogtmp_json");
System.out.println("====> Dropping lareferencialogtmp table");
logger.info("Dropping lareferencialogtmp table");
String drop_lareferencialogtmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".lareferencialogtmp";
stmt.executeUpdate(drop_lareferencialogtmp);
System.out.println("====> Dropped lareferencialogtmp table");
logger.info("Dropped lareferencialogtmp table");
System.out.println("====> Creating lareferencialogtmp");
logger.info("Creating lareferencialogtmp");
String create_lareferencialogtmp = "CREATE TABLE " +
ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp(matomoid INT, " +
"source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " +
@ -194,9 +194,9 @@ public class LaReferenciaStats {
"clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " +
"stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(create_lareferencialogtmp);
System.out.println("====> Created lareferencialogtmp");
logger.info("Created lareferencialogtmp");
System.out.println("====> Inserting into lareferencialogtmp");
logger.info("Inserting into lareferencialogtmp");
String insert_lareferencialogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp " +
"SELECT DISTINCT cast(idSite as INT) as matomoid, CONCAT('opendoar____::', " +
"actiondetail.customVariables.`2`.customVariablePageValue2) as source, idVisit as id_Visit, country, " +
@ -207,7 +207,7 @@ public class LaReferenciaStats {
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp_json " +
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
stmt.executeUpdate(insert_lareferencialogtmp);
System.out.println("====> Inserted into lareferencialogtmp");
logger.info("Inserted into lareferencialogtmp");
stmt.close();
}
@ -217,7 +217,7 @@ public class LaReferenciaStats {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Cleaning download double clicks");
logger.info("Cleaning download double clicks");
// clean download double clicks
String sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp WHERE EXISTS (" +
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp " +
@ -231,10 +231,10 @@ public class LaReferenciaStats {
"AND lareferencialogtmp.timestamp=p1.timestamp)";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaned download double clicks");
logger.info("Cleaned download double clicks");
stmt = ConnectDB.getHiveConnection().createStatement();
System.out.println("====> Cleaning action double clicks");
logger.info("Cleaning action double clicks");
// clean view double clicks
sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp WHERE EXISTS (" +
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp " +
@ -248,7 +248,7 @@ public class LaReferenciaStats {
"AND lareferencialogtmp.timestamp=p1.timestamp)";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaned action double clicks");
logger.info("Cleaned action double clicks");
// conn.close();
}
@ -257,7 +257,7 @@ public class LaReferenciaStats {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Creating la_result_views_monthly_tmp view");
logger.info("Creating la_result_views_monthly_tmp view");
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".la_result_views_monthly_tmp AS "
+
"SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
@ -268,16 +268,16 @@ public class LaReferenciaStats {
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
"source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
System.out.println("====> Created la_result_views_monthly_tmp view");
logger.info("Created la_result_views_monthly_tmp view");
System.out.println("====> Dropping la_views_stats_tmp table");
logger.info("Dropping la_views_stats_tmp table");
sql = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".la_views_stats_tmp";
stmt.executeUpdate(sql);
System.out.println("====> Dropped la_views_stats_tmp table");
logger.info("Dropped la_views_stats_tmp table");
System.out.println("====> Creating la_views_stats_tmp table");
logger.info("Creating la_views_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp " +
"AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date, " +
"max(views) AS count, max(openaire_referrer) AS openaire " +
@ -287,7 +287,7 @@ public class LaReferenciaStats {
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
stmt.executeUpdate(sql);
System.out.println("====> Created la_views_stats_tmp table");
logger.info("Created la_views_stats_tmp table");
stmt.close();
ConnectDB.getHiveConnection().close();
@ -298,7 +298,7 @@ public class LaReferenciaStats {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Creating la_result_downloads_monthly_tmp view");
logger.info("Creating la_result_downloads_monthly_tmp view");
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ ".la_result_downloads_monthly_tmp AS " +
"SELECT entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
@ -309,16 +309,16 @@ public class LaReferenciaStats {
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
"source ORDER BY source, entity_id";
stmt.executeUpdate(sql);
System.out.println("====> Created la_result_downloads_monthly_tmp view");
logger.info("Created la_result_downloads_monthly_tmp view");
System.out.println("====> Dropping la_views_stats_tmp table");
logger.info("Dropping la_views_stats_tmp table");
sql = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".la_views_stats_tmp";
stmt.executeUpdate(sql);
System.out.println("====> Dropped la_views_stats_tmp table");
logger.info("Dropped la_views_stats_tmp table");
System.out.println("====> Creating la_downloads_stats_tmp table");
logger.info("Creating la_downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp " +
"AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date, " +
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
@ -328,7 +328,7 @@ public class LaReferenciaStats {
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
stmt.executeUpdate(sql);
System.out.println("====> Created la_downloads_stats_tmp table");
logger.info("Created la_downloads_stats_tmp table");
stmt.close();
ConnectDB.getHiveConnection().close();
@ -339,12 +339,12 @@ public class LaReferenciaStats {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Updating lareferencialog");
logger.info("Updating lareferencialog");
String sql = "insert into " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialog " +
"select * from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp";
stmt.executeUpdate(sql);
System.out.println("====> Updating views_stats");
logger.info("Updating views_stats");
sql = "insert into " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
"select * from " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp";
stmt.executeUpdate(sql);
@ -352,7 +352,7 @@ public class LaReferenciaStats {
// sql = "insert into public.views_stats select * from la_views_stats_tmp;";
// stmt.executeUpdate(sql);
System.out.println("====> Updating downloads_stats");
logger.info("Updating downloads_stats");
sql = "insert into " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
"select * from " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp";
stmt.executeUpdate(sql);
@ -380,7 +380,7 @@ public class LaReferenciaStats {
}
// hdfs.close();
} catch (Exception e) {
log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logRepoPath));
logger.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logRepoPath));
throw new Exception("HDFS file path with exported data does not exist : " + logRepoPath, e);
}
@ -413,7 +413,7 @@ public class LaReferenciaStats {
// fs.close();
} catch (Exception e) {
log.error(e);
logger.error(e.getMessage());
throw new Exception(e);
}

View File

@ -25,11 +25,15 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author D. Pierrakos, S. Zoupanos
*/
public class PiwikDownloadLogs {
private final String piwikUrl;
@ -42,7 +46,7 @@ public class PiwikDownloadLogs {
private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
private final String format = "&format=json";
private final Logger log = Logger.getLogger(this.getClass());
private static final Logger logger = LoggerFactory.getLogger(PiwikDownloadLogs.class);
public PiwikDownloadLogs(String piwikUrl, String tokenAuth) {
this.piwikUrl = piwikUrl;
@ -75,7 +79,7 @@ public class PiwikDownloadLogs {
return response.toString();
} catch (Exception e) {
log.error("Failed to get URL: " + e);
logger.error("Failed to get URL: " + e);
throw new Exception("Failed to get URL: " + e.toString(), e);
}
}
@ -117,7 +121,7 @@ public class PiwikDownloadLogs {
rs_date.close();
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
logger.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
String period = "&period=day&date=" + sdf.format(date);
String outFolder = "";

View File

@ -19,11 +19,15 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author D. Pierrakos, S. Zoupanos
*/
public class PiwikStatsDB {
private String logPath;
@ -32,7 +36,8 @@ public class PiwikStatsDB {
private Statement stmt = null;
private final Logger log = Logger.getLogger(this.getClass());
private static final Logger logger = LoggerFactory.getLogger(PiwikStatsDB.class);
private String CounterRobotsURL;
private ArrayList robotsList;
@ -77,7 +82,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(createDatabase);
} catch (Exception e) {
log.error("Failed to create database: " + e);
logger.error("Failed to create database: " + e);
throw new Exception("Failed to create database: " + e.toString(), e);
}
}
@ -112,10 +117,9 @@ public class PiwikStatsDB {
stmt.close();
ConnectDB.getHiveConnection().close();
log.info("Usage Tables Created");
} catch (Exception e) {
log.error("Failed to create tables: " + e);
logger.error("Failed to create tables: " + e);
throw new Exception("Failed to create tables: " + e.toString(), e);
}
}
@ -153,10 +157,9 @@ public class PiwikStatsDB {
//////////////////////////////////////////////////
stmt.close();
log.info("Usage Tmp Tables Created");
} catch (Exception e) {
log.error("Failed to create tmptables: " + e);
logger.error("Failed to create tmptables: " + e);
throw new Exception("Failed to create tmp tables: " + e.toString(), e);
// System.exit(0);
}
@ -167,47 +170,41 @@ public class PiwikStatsDB {
ReadCounterRobotsList counterRobots = new ReadCounterRobotsList(this.getCounterRobotsURL());
this.robotsList = counterRobots.getRobotsPatterns();
System.out.println("====> Processing repository logs");
logger.info("Processing repository logs");
// processRepositoryLog();
System.out.println("====> Repository logs process done");
log.info("repository process done");
logger.info("Repository logs process done");
System.out.println("====> Removing double clicks");
logger.info("Removing double clicks");
// removeDoubleClicks();
System.out.println("====> Removing double clicks done");
log.info("removing double clicks done");
logger.info("Removing double clicks done");
System.out.println("====> Cleaning oai");
logger.info("Cleaning oai");
// cleanOAI();
System.out.println("====> Cleaning oai done");
log.info("cleaning oai done");
logger.info("Cleaning oai done");
System.out.println("====> ViewsStats processing starts");
logger.info("ViewsStats processing starts");
// viewsStats();
System.out.println("====> ViewsStats processing ends");
logger.info("ViewsStats processing ends");
System.out.println("====> DownloadsStats processing starts");
logger.info("DownloadsStats processing starts");
// downloadsStats();
System.out.println("====> DownloadsStats processing starts");
logger.info("DownloadsStats processing starts");
System.out.println("====> Processing portal logs");
logger.info("Processing portal logs");
// processPortalLog();
System.out.println("====> Portal logs process done");
log.info("portal process done");
logger.info("Portal logs process done");
System.out.println("====> Processing portal usagestats");
logger.info("Processing portal usagestats");
// To see why this never ends
portalStats();
log.info("portal usagestats done");
System.out.println("====> Portal usagestats process done");
logger.info("Portal usagestats process done");
System.out.println("====> Updating Production Tables");
logger.info("Updating Production Tables");
// updateProdTables();
System.out.println("====> Updated Production Tables");
log.info("updateProdTables done");
logger.info("Updated Production Tables");
} catch (Exception e) {
log.error("Failed to process logs: " + e);
logger.error("Failed to process logs: " + e);
throw new Exception("Failed to process logs: " + e.toString(), e);
}
}
@ -228,18 +225,18 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Adding JSON Serde jar");
logger.info("Adding JSON Serde jar");
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
System.out.println("====> Added JSON Serde jar");
logger.info("Added JSON Serde jar");
System.out.println("====> Dropping piwiklogtmp_json table");
logger.info("Dropping piwiklogtmp_json table");
String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp_json";
stmt.executeUpdate(drop_piwiklogtmp_json);
System.out.println("====> Dropped piwiklogtmp_json table");
logger.info("Dropped piwiklogtmp_json table");
System.out.println("====> Creating piwiklogtmp_json");
logger.info("Creating piwiklogtmp_json");
String create_piwiklogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp_json(\n" +
@ -265,25 +262,25 @@ public class PiwikStatsDB {
"LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_piwiklogtmp_json);
System.out.println("====> Created piwiklogtmp_json");
logger.info("Created piwiklogtmp_json");
System.out.println("====> Dropping piwiklogtmp table");
logger.info("Dropping piwiklogtmp table");
String drop_piwiklogtmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp";
stmt.executeUpdate(drop_piwiklogtmp);
System.out.println("====> Dropped piwiklogtmp");
logger.info("Dropped piwiklogtmp");
System.out.println("====> Creating piwiklogtmp");
logger.info("Creating piwiklogtmp");
String create_piwiklogtmp = "CREATE TABLE " +
ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp (source BIGINT, id_Visit STRING, country STRING, action STRING, url STRING, " +
"entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
"clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(create_piwiklogtmp);
System.out.println("====> Created piwiklogtmp");
logger.info("Created piwiklogtmp");
System.out.println("====> Inserting into piwiklogtmp");
logger.info("Inserting into piwiklogtmp");
String insert_piwiklogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, " +
"actiondetail.type as action, actiondetail.url as url, " +
@ -293,7 +290,7 @@ public class PiwikStatsDB {
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json\n" +
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
stmt.executeUpdate(insert_piwiklogtmp);
System.out.println("====> Inserted into piwiklogtmp");
logger.info("Inserted into piwiklogtmp");
stmt.close();
}
@ -302,7 +299,7 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Cleaning download double clicks");
logger.info("Cleaning download double clicks");
// clean download double clicks
String sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"WHERE EXISTS (\n" +
@ -316,10 +313,10 @@ public class PiwikStatsDB {
"AND piwiklogtmp.source=p1.source AND piwiklogtmp.id_visit=p1.id_visit \n" +
"AND piwiklogtmp.action=p1.action AND piwiklogtmp.entity_id=p1.entity_id AND piwiklogtmp.timestamp=p1.timestamp)";
stmt.executeUpdate(sql);
System.out.println("====> Cleaned download double clicks");
logger.info("Cleaned download double clicks");
// clean view double clicks
System.out.println("====> Cleaning action double clicks");
logger.info("Cleaning action double clicks");
sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"WHERE EXISTS (\n" +
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp \n" +
@ -332,7 +329,7 @@ public class PiwikStatsDB {
"AND piwiklogtmp.source=p1.source AND piwiklogtmp.id_visit=p1.id_visit \n" +
"AND piwiklogtmp.action=p1.action AND piwiklogtmp.entity_id=p1.entity_id AND piwiklogtmp.timestamp=p1.timestamp)";
stmt.executeUpdate(sql);
System.out.println("====> Cleaned action double clicks");
logger.info("Cleaned action double clicks");
stmt.close();
}
@ -340,14 +337,14 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Dropping result_views_monthly_tmp table");
logger.info("Dropping result_views_monthly_tmp table");
String drop_result_views_monthly_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".result_views_monthly_tmp";
stmt.executeUpdate(drop_result_views_monthly_tmp);
System.out.println("====> Dropped result_views_monthly_tmp table");
logger.info("Dropped result_views_monthly_tmp table");
System.out.println("====> Creating result_views_monthly_tmp table");
logger.info("Creating result_views_monthly_tmp table");
String create_result_views_monthly_tmp = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ ".result_views_monthly_tmp " +
"AS SELECT entity_id AS id, " +
@ -359,16 +356,16 @@ public class PiwikStatsDB {
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
"source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_views_monthly_tmp);
System.out.println("====> Created result_views_monthly_tmp table");
logger.info("Created result_views_monthly_tmp table");
System.out.println("====> Dropping views_stats_tmp table");
logger.info("Dropping views_stats_tmp table");
String drop_views_stats_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".views_stats_tmp";
stmt.executeUpdate(drop_views_stats_tmp);
System.out.println("====> Dropped views_stats_tmp table");
logger.info("Dropped views_stats_tmp table");
System.out.println("====> Creating views_stats_tmp table");
logger.info("Creating views_stats_tmp table");
String create_views_stats_tmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".views_stats_tmp " +
"AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
@ -379,29 +376,29 @@ public class PiwikStatsDB {
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
stmt.executeUpdate(create_views_stats_tmp);
System.out.println("====> Created views_stats_tmp table");
logger.info("Created views_stats_tmp table");
System.out.println("====> Dropping views_stats table");
logger.info("Dropping views_stats table");
String drop_views_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".views_stats";
stmt.executeUpdate(drop_views_stats);
System.out.println("====> Dropped views_stats table");
logger.info("Dropped views_stats table");
System.out.println("====> Creating views_stats table");
logger.info("Creating views_stats table");
String create_view_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
stmt.executeUpdate(create_view_stats);
System.out.println("====> Created views_stats table");
logger.info("Created views_stats table");
System.out.println("====> Dropping pageviews_stats_tmp table");
logger.info("Dropping pageviews_stats_tmp table");
String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".pageviews_stats_tmp";
stmt.executeUpdate(drop_pageviews_stats_tmp);
System.out.println("====> Dropped pageviews_stats_tmp table");
logger.info("Dropped pageviews_stats_tmp table");
System.out.println("====> Creating pageviews_stats_tmp table");
logger.info("Creating pageviews_stats_tmp table");
String create_pageviews_stats_tmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".pageviews_stats_tmp AS SELECT " +
"'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count " +
@ -411,21 +408,21 @@ public class PiwikStatsDB {
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
stmt.executeUpdate(create_pageviews_stats_tmp);
System.out.println("====> Created pageviews_stats_tmp table");
logger.info("Created pageviews_stats_tmp table");
System.out.println("====> Droping pageviews_stats table");
logger.info("Droping pageviews_stats table");
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".pageviews_stats";
stmt.executeUpdate(drop_pageviews_stats);
System.out.println("====> Dropped pageviews_stats table");
logger.info("Dropped pageviews_stats table");
System.out.println("====> Creating pageviews_stats table");
logger.info("Creating pageviews_stats table");
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".pageviews_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
stmt.executeUpdate(create_pageviews_stats);
System.out.println("====> Created pageviews_stats table");
logger.info("Created pageviews_stats table");
stmt.close();
ConnectDB.getHiveConnection().close();
@ -435,14 +432,14 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Dropping result_downloads_monthly_tmp view");
logger.info("Dropping result_downloads_monthly_tmp view");
String drop_result_views_monthly_tmp = "DROP VIEW IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".result_views_monthly_tmp";
stmt.executeUpdate(drop_result_views_monthly_tmp);
System.out.println("====> Dropped result_downloads_monthly_tmp view");
logger.info("Dropped result_downloads_monthly_tmp view");
System.out.println("====> Creating result_views_monthly_tmp view");
logger.info("Creating result_views_monthly_tmp view");
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " +
"AS SELECT entity_id AS id, COUNT(entity_id) as downloads, " +
"SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, " +
@ -452,16 +449,16 @@ public class PiwikStatsDB {
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source " +
"ORDER BY source, entity_id, month";
stmt.executeUpdate(sql);
System.out.println("====> Created result_views_monthly_tmp view");
logger.info("Created result_views_monthly_tmp view");
System.out.println("====> Dropping downloads_stats_tmp table");
logger.info("Dropping downloads_stats_tmp table");
String drop_views_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".downloads_stats_tmp";
stmt.executeUpdate(drop_views_stats);
System.out.println("====> Dropped downloads_stats_tmp table");
logger.info("Dropped downloads_stats_tmp table");
System.out.println("====> Creating downloads_stats_tmp table");
logger.info("Creating downloads_stats_tmp table");
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
"SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
@ -470,26 +467,26 @@ public class PiwikStatsDB {
"WHERE p.source=d.piwik_id and p.id=ro.oid " +
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
System.out.println("====> Created downloads_stats_tmp table");
logger.info("Created downloads_stats_tmp table");
stmt.executeUpdate(sql);
System.out.println("====> Dropping downloads_stats table");
logger.info("Dropping downloads_stats table");
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".downloads_stats";
stmt.executeUpdate(drop_pageviews_stats);
System.out.println("====> Dropped downloads_stats table");
logger.info("Dropped downloads_stats table");
System.out.println("====> Creating downloads_stats table");
logger.info("Creating downloads_stats table");
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".downloads_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
stmt.executeUpdate(create_pageviews_stats);
System.out.println("====> Created downloads_stats table");
logger.info("Created downloads_stats table");
System.out.println("====> Dropping result_downloads_monthly_tmp view");
logger.info("Dropping result_downloads_monthly_tmp view");
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp";
System.out.println("====> Dropped result_downloads_monthly_tmp view");
logger.info("Dropped result_downloads_monthly_tmp view");
stmt.executeUpdate(sql);
stmt.close();
@ -721,18 +718,18 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Adding JSON Serde jar");
logger.info("Adding JSON Serde jar");
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
System.out.println("====> Added JSON Serde jar");
logger.info("Added JSON Serde jar");
System.out.println("====> Dropping process_portal_log_tmp_json table");
logger.info("Dropping process_portal_log_tmp_json table");
String drop_process_portal_log_tmp_json = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp_json";
stmt.executeUpdate(drop_process_portal_log_tmp_json);
System.out.println("====> Dropped process_portal_log_tmp_json table");
logger.info("Dropped process_portal_log_tmp_json table");
System.out.println("====> Creating process_portal_log_tmp_json");
logger.info("Creating process_portal_log_tmp_json");
String create_process_portal_log_tmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp_json(" +
" `idSite` STRING,\n" +
@ -752,25 +749,25 @@ public class PiwikStatsDB {
"LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_process_portal_log_tmp_json);
System.out.println("====> Created process_portal_log_tmp_json");
logger.info("Created process_portal_log_tmp_json");
System.out.println("====> Droping process_portal_log_tmp table");
logger.info("Droping process_portal_log_tmp table");
String drop_process_portal_log_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".process_portal_log_tmp";
stmt.executeUpdate(drop_process_portal_log_tmp);
System.out.println("====> Dropped process_portal_log_tmp");
logger.info("Dropped process_portal_log_tmp");
System.out.println("====> Creating process_portal_log_tmp");
logger.info("Creating process_portal_log_tmp");
String create_process_portal_log_tmp = "CREATE TABLE " +
ConnectDB.getUsageStatsDBSchema() +
".process_portal_log_tmp (source BIGINT, id_visit STRING, country STRING, action STRING, url STRING, " +
"entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
"clustered by (source, id_visit, timestamp) into 100 buckets stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(create_process_portal_log_tmp);
System.out.println("====> Created process_portal_log_tmp");
logger.info("Created process_portal_log_tmp");
System.out.println("====> Inserting into process_portal_log_tmp");
logger.info("Inserting into process_portal_log_tmp");
String insert_process_portal_log_tmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ ".process_portal_log_tmp " +
"SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, actiondetail.type as action, "
@ -802,7 +799,7 @@ public class PiwikStatsDB {
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp_json " +
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
stmt.executeUpdate(insert_process_portal_log_tmp);
System.out.println("====> Inserted into process_portal_log_tmp");
logger.info("Inserted into process_portal_log_tmp");
stmt.close();
}
@ -837,7 +834,7 @@ public class PiwikStatsDB {
// IN (SELECT roid.oid FROM openaire_prod_stats_20200821.result_oids roid WHERE roid.oid IS NOT NULL AND
// roid.oid != '');
System.out.println("====> PortalStats - Step 1");
logger.info("PortalStats - Step 1");
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'oaItem', `timestamp`, referrer_name, agent "
+
@ -848,7 +845,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> PortalStats - Step 2");
logger.info("PortalStats - Step 2");
stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'datasource', `timestamp`, referrer_name, agent "
@ -860,7 +857,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> PortalStats - Step 3");
logger.info("PortalStats - Step 3");
stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'organization', `timestamp`, referrer_name, agent "
@ -872,7 +869,7 @@ public class PiwikStatsDB {
// stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> PortalStats - Step 4");
logger.info("PortalStats - Step 4");
stmt = con.createStatement();
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'project', `timestamp`, referrer_name, agent "
@ -890,7 +887,7 @@ public class PiwikStatsDB {
private void cleanOAI() throws Exception {
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Cleaning oai - Step 1");
logger.info("Cleaning oai - Step 1");
stmt = ConnectDB.getHiveConnection().createStatement();
String sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chlc.min-saude.pt/'," +
@ -898,7 +895,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 2");
logger.info("Cleaning oai - Step 2");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hospitaldebraga.pt/'," +
@ -906,7 +903,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 3");
logger.info("Cleaning oai - Step 3");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipl.pt/'," +
@ -914,7 +911,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 4");
logger.info("Cleaning oai - Step 4");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:bibliotecadigital.ipb.pt/'," +
@ -922,7 +919,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 5");
logger.info("Cleaning oai - Step 5");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ismai.pt/'," +
@ -930,7 +927,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 6");
logger.info("Cleaning oai - Step 6");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorioaberto.uab.pt/'," +
@ -938,7 +935,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 7");
logger.info("Cleaning oai - Step 7");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.uac.pt/'," +
@ -946,7 +943,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 8");
logger.info("Cleaning oai - Step 8");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.insa.pt/'," +
@ -954,7 +951,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 9");
logger.info("Cleaning oai - Step 9");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipcb.pt/'," +
@ -962,7 +959,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 10");
logger.info("Cleaning oai - Step 10");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ispa.pt/'," +
@ -970,7 +967,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 11");
logger.info("Cleaning oai - Step 11");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chporto.pt/'," +
@ -978,7 +975,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 12");
logger.info("Cleaning oai - Step 12");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ucp.pt/'," +
@ -986,7 +983,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 13");
logger.info("Cleaning oai - Step 13");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:rihuc.huc.min-saude.pt/'," +
@ -994,7 +991,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 14");
logger.info("Cleaning oai - Step 14");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipv.pt/'," +
@ -1002,7 +999,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 15");
logger.info("Cleaning oai - Step 15");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:www.repository.utl.pt/'," +
@ -1010,7 +1007,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 16");
logger.info("Cleaning oai - Step 16");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:run.unl.pt/'," +
@ -1018,7 +1015,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 17");
logger.info("Cleaning oai - Step 17");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:sapientia.ualg.pt/'," +
@ -1026,7 +1023,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 18");
logger.info("Cleaning oai - Step 18");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipsantarem.pt/'," +
@ -1034,7 +1031,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 19");
logger.info("Cleaning oai - Step 19");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:arca.igc.gulbenkian.pt/'," +
@ -1042,7 +1039,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 20");
logger.info("Cleaning oai - Step 20");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:ubibliorum.ubi.pt/'," +
@ -1050,7 +1047,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 21");
logger.info("Cleaning oai - Step 21");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:digituma.uma.pt/'," +
@ -1058,7 +1055,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 22");
logger.info("Cleaning oai - Step 22");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ul.pt/'," +
@ -1066,7 +1063,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 23");
logger.info("Cleaning oai - Step 23");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hff.min-saude.pt/'," +
@ -1074,7 +1071,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 24");
logger.info("Cleaning oai - Step 24");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorium.sdum.uminho.pt/'," +
@ -1082,7 +1079,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 25");
logger.info("Cleaning oai - Step 25");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:recipp.ipp.pt/'," +
@ -1090,7 +1087,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 26");
logger.info("Cleaning oai - Step 26");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:bdigital.ufp.pt/'," +
@ -1098,7 +1095,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 27");
logger.info("Cleaning oai - Step 27");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.lneg.pt/'," +
@ -1106,7 +1103,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 28");
logger.info("Cleaning oai - Step 28");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:iconline.ipleiria.pt/'," +
@ -1114,7 +1111,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 29");
logger.info("Cleaning oai - Step 29");
stmt = ConnectDB.getHiveConnection().createStatement();
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:comum.rcaap.pt/'," +
@ -1122,7 +1119,7 @@ public class PiwikStatsDB {
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Done, closing connection");
logger.info("Cleaning oai - Done, closing connection");
ConnectDB.getHiveConnection().close();
}
@ -1132,7 +1129,7 @@ public class PiwikStatsDB {
try {
url = URLDecoder.decode(url, "UTF-8");
} catch (Exception e) {
log.info(url);
logger.info("Error when decoding the following URL: " + url);
}
if (url.indexOf("datasourceId=") > 0 && url.substring(url.indexOf("datasourceId=") + 13).length() >= 46) {
url = "datasource|"
@ -1169,46 +1166,45 @@ public class PiwikStatsDB {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Inserting data to piwiklog");
logger.info("Inserting data to piwiklog");
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog " +
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp";
stmt.executeUpdate(sql);
System.out.println("====> Inserting data to views_stats");
logger.info("Inserting data to views_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
stmt.executeUpdate(sql);
System.out.println("====> Inserting data to downloads_stats");
logger.info("Inserting data to downloads_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
stmt.executeUpdate(sql);
System.out.println("====> Inserting data to pageviews_stats");
logger.info("Inserting data to pageviews_stats");
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
stmt.executeUpdate(sql);
System.out.println("====> Dropping table views_stats_tmp");
logger.info("Dropping table views_stats_tmp");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
stmt.executeUpdate(sql);
System.out.println("====> Dropping table downloads_stats_tmp");
logger.info("Dropping table downloads_stats_tmp");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
stmt.executeUpdate(sql);
System.out.println("====> Dropping table pageviews_stats_tmp");
logger.info("Dropping table pageviews_stats_tmp");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
stmt.executeUpdate(sql);
System.out.println("====> Dropping table process_portal_log_tmp");
logger.info("Dropping table process_portal_log_tmp");
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp";
stmt.executeUpdate(sql);
stmt.close();
ConnectDB.getHiveConnection().close();
log.info("updateProdTables done");
}
private ArrayList<String> listHdfsDir(String dir) throws Exception {
@ -1227,7 +1223,7 @@ public class PiwikStatsDB {
hdfs.close();
} catch (Exception e) {
log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logPath));
logger.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logPath));
throw new Exception("HDFS file path with exported data does not exist : " + logPath, e);
}
@ -1260,7 +1256,7 @@ public class PiwikStatsDB {
// fs.close();
} catch (Exception e) {
log.error(e);
logger.error(e.getMessage());
throw new Exception(e);
}

View File

@ -7,10 +7,13 @@
package eu.dnetlib.oa.graph.usagestats.export;
/**
* @author dpie
*
* @author D. Pierrakos, S. Zoupanos
*
*/
/**
* @author dpie
* @author D. Pierrakos, S. Zoupanos
*/
import java.io.BufferedReader;
import java.io.IOException;

View File

@ -21,20 +21,21 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Created by dpie
* @author D. Pierrakos, S. Zoupanos
*/
public class SarcStats {
private Statement stmt = null;
private final Logger log = Logger.getLogger(this.getClass());
private static final Logger logger = LoggerFactory.getLogger(SarcStats.class);
public SarcStats() throws Exception {
// createTables();
@ -61,9 +62,9 @@ public class SarcStats {
stmt.close();
ConnectDB.getHiveConnection().close();
log.info("Sushi Tables Created");
logger.info("Sushi Tables Created");
} catch (Exception e) {
log.error("Failed to create tables: " + e);
logger.error("Failed to create tables: " + e);
throw new Exception("Failed to create tables: " + e.toString(), e);
}
}
@ -73,18 +74,18 @@ public class SarcStats {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Adding JSON Serde jar");
logger.info("Adding JSON Serde jar");
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
System.out.println("====> Added JSON Serde jar");
logger.info("Added JSON Serde jar");
System.out.println("====> Dropping sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
logger.info("Dropping sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
String drop_sarc_sushilogtmp_json_array = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".sarc_sushilogtmp_json_array_" + issn.replace("-", "_");
stmt.executeUpdate(drop_sarc_sushilogtmp_json_array);
System.out.println("====> Dropped sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
logger.info("Dropped sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
System.out.println("====> Creating sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
logger.info("Creating sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
String create_sarc_sushilogtmp_json_array = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + "(\n" +
" `ItemIdentifier` ARRAY<\n" +
@ -108,16 +109,16 @@ public class SarcStats {
"LOCATION '" + sarcsReportPathArray + "/" + issn + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_sarc_sushilogtmp_json_array);
System.out.println("====> Created sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
logger.info("Created sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
System.out.println("====> Dropping sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
logger.info("Dropping sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
String drop_sarc_sushilogtmp_json_non_array = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_");
stmt.executeUpdate(drop_sarc_sushilogtmp_json_non_array);
System.out.println("====> Dropped sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
logger.info("Dropped sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
System.out.println("====> Creating sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
logger.info("Creating sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
String create_sarc_sushilogtmp_json_non_array = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + "(\n" +
" `ItemIdentifier` struct<\n" +
@ -139,18 +140,18 @@ public class SarcStats {
"LOCATION '" + sarcsReportPathNonArray + "/" + issn + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_sarc_sushilogtmp_json_non_array);
System.out.println("====> Created sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
logger.info("Created sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
System.out.println("====> Creating sarc_sushilogtmp table");
logger.info("Creating sarc_sushilogtmp table");
String create_sarc_sushilogtmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".sarc_sushilogtmp(source STRING, repository STRING, " +
"rid STRING, date STRING, metric_type STRING, count INT) clustered by (source) into 100 buckets stored as orc "
+
"tblproperties('transactional'='true')";
stmt.executeUpdate(create_sarc_sushilogtmp);
System.out.println("====> Created sarc_sushilogtmp table");
logger.info("Created sarc_sushilogtmp table");
System.out.println("====> Inserting to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
logger.info("Inserting to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
String insert_sarc_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp " +
"SELECT 'SARC-OJS', '" + issn + "' , `ItemIdent`.`Value`, `ItemPerformance`.`Period`.`Begin`, " +
"`ItemPerformance`.`Instance`.`MetricType`, `ItemPerformance`.`Instance`.`Count` " +
@ -158,15 +159,15 @@ public class SarcStats {
+
"LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent ";
stmt.executeUpdate(insert_sarc_sushilogtmp);
System.out.println("====> Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
logger.info("Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
System.out.println("====> Inserting to sarc_sushilogtmp table (sarc_sushilogtmp_json_non_array)");
logger.info("Inserting to sarc_sushilogtmp table (sarc_sushilogtmp_json_non_array)");
insert_sarc_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp " +
"SELECT 'SARC-OJS', '" + issn + "' , `ItemIdentifier`.`Value`, `ItemPerformance`.`Period`.`Begin`, " +
"`ItemPerformance`.`Instance`.`MetricType`, `ItemPerformance`.`Instance`.`Count` " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_");
stmt.executeUpdate(insert_sarc_sushilogtmp);
System.out.println("====> Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_non_array)");
logger.info("Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_non_array)");
ConnectDB.getHiveConnection().close();
@ -179,12 +180,12 @@ public class SarcStats {
Statement stmt = ConnectDB.getHiveConnection().createStatement();
ConnectDB.getHiveConnection().setAutoCommit(false);
System.out.println("====> Dropping sarc_sushilogtmp table");
logger.info("Dropping sarc_sushilogtmp table");
String drop_sarc_sushilogtmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".sarc_sushilogtmp";
stmt.executeUpdate(drop_sarc_sushilogtmp);
System.out.println("====> Dropped sarc_sushilogtmp table");
logger.info("Dropped sarc_sushilogtmp table");
ConnectDB.getHiveConnection().close();
List<String[]> issnAndUrls = new ArrayList<String[]>();
@ -252,7 +253,7 @@ public class SarcStats {
ConnectDB.getHiveConnection().setAutoCommit(false);
// Insert into downloads_stats
System.out.println("====> Inserting into downloads_stats");
logger.info("Inserting into downloads_stats");
// String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, public.datasource_oids d, public.datasource_results dr, public.result_pids ro WHERE d.orid LIKE '%' || s.repository || '%' AND dr.id=d.id AND dr.result=ro.id AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS';";
String insertDStats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ ".downloads_stats SELECT s.source, d.id AS repository_id, " +
@ -264,14 +265,14 @@ public class SarcStats {
"WHERE d.oid LIKE CONCAT('%', s.repository, '%') AND dr.id=d.id AND dr.result=ro.id AND " +
"s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS'";
stmt.executeUpdate(insertDStats);
System.out.println("====> Inserted into downloads_stats");
logger.info("Inserted into downloads_stats");
// Insert into sushilog
System.out.println("====> Inserting into sushilog");
logger.info("Inserting into sushilog");
String insertSushiLog = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ ".sushilog SELECT * " + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp";
stmt.executeUpdate(insertSushiLog);
System.out.println("====> Inserted into sushilog");
logger.info("Inserted into sushilog");
stmt.close();
ConnectDB.getHiveConnection().close();
@ -279,7 +280,7 @@ public class SarcStats {
public void getARReport(String sarcsReportPathArray, String sarcsReportPathNonArray,
String url, String issn) throws Exception {
log.info("Processing SARC! issn: " + issn + " with url: " + url);
logger.info("Processing SARC! issn: " + issn + " with url: " + url);
ConnectDB.getHiveConnection().setAutoCommit(false);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
@ -483,7 +484,7 @@ public class SarcStats {
} catch (Exception e) {
// Logging error and silently continuing
log.error("Failed to get URL: " + e);
logger.error("Failed to get URL: " + e);
System.out.println("Failed to get URL: " + e);
// return null;
// throw new Exception("Failed to get URL: " + e.toString(), e);

View File

@ -3,21 +3,24 @@ package eu.dnetlib.oa.graph.usagestats.export;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Main class for downloading and processing Usage statistics
*
* @author D. Pierrakos, S. Zoupanos
*/
public class UsageStatsExporter {
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
private Properties properties;
public void runImpalaQuery() throws Exception {
Statement stmt = ConnectDB.getImpalaConnection().createStatement();
ConnectDB.getImpalaConnection().setAutoCommit(false);
System.out.println("====> Executing Impala query");
logger.info("Executing Impala query");
Statement statement = ConnectDB.getImpalaConnection().createStatement();
ResultSet rs = statement
@ -40,45 +43,36 @@ public class UsageStatsExporter {
public void export() throws Exception {
logger.info("=====> Test of the logger (info)");
logger.debug("=====> Test of the logger (debug)");
logger.error("=====> Test of the logger (error)");
// connect to DB
System.out.println("====> Initialising DB properties");
ConnectDB.init(properties);
runImpalaQuery();
System.exit(0);
// runImpalaQuery();
// Create DB tables - they are also needed to download the statistics too
System.out.println("====> Creating database and tables");
logger.info("Creating database and tables");
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
//
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
// // the moment
System.out.println("====> Initializing the download logs module");
logger.info("Initializing the download logs module");
PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken);
System.out.println("====> Downloading piwik logs");
logger.info("Downloading piwik logs");
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
System.out.println("====> Downloaded piwik logs");
logger.info("Downloaded piwik logs");
// Create DB tables, insert/update statistics
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
System.out.println("====> Processing logs");
logger.info("Processing logs");
piwikstatsdb.processLogs();
// log.info("process logs done");
System.out.println("====> Creating LaReferencia tables");
logger.info("Creating LaReferencia tables");
LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(ExecuteWorkflow.lareferenciaBaseURL,
ExecuteWorkflow.lareferenciaAuthToken);
System.out.println("====> Downloading LaReferencia logs");
logger.info("Downloading LaReferencia logs");
// lrf.GetLaReferenciaRepos(lareferenciaLogPath);
System.out.println("====> Downloaded LaReferencia logs");
logger.info("Downloaded LaReferencia logs");
LaReferenciaStats lastats = new LaReferenciaStats(ExecuteWorkflow.lareferenciaLogPath);
System.out.println("====> Processing LaReferencia logs");
logger.info("Processing LaReferencia logs");
// lastats.processLogs();
// log.info("LaReferencia logs done");

View File

@ -1,43 +0,0 @@
<html>
<head>
<title>Revision 58415: /dnet45/modules/dnet-openaire-usage-stats-export-wf/trunk/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export</title>
</head>
<body>
<h2>Revision 58415: /dnet45/modules/dnet-openaire-usage-stats-export-wf/trunk/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export</h2>
<ul>
<li>
<a href="../">..</a>
</li>
<li>
<a href="ConnectDB.java">ConnectDB.java</a>
</li>
<li>
<a href="ExecuteWorkflow.java">ExecuteWorkflow.java</a>
</li>
<li>
<a href="IrusStats.java">IrusStats.java</a>
</li>
<li>
<a href="PiwikDownloadLogs.java">PiwikDownloadLogs.java</a>
</li>
<li>
<a href="PiwikStatsDB.java">PiwikStatsDB.java</a>
</li>
<li>
<a href="ReadCounterRobotsList.java">ReadCounterRobotsList.java</a>
</li>
<li>
<a href="SarcStats.java">SarcStats.java</a>
</li>
<li>
<a href="UsageStatsExporter.java">UsageStatsExporter.java</a>
</li>
</ul>
<hr noshade>
<em>
Powered by
<a href="http://subversion.tigris.org/">Subversion</a>
version 1.4.4 (r25188).
</em>
</body>
</html>