forked from D-Net/dnet-hadoop
Adding correct logger everywhere, cleaning code, removing sysouts
This commit is contained in:
parent
c6bbe215e1
commit
07e750939f
|
@ -6,11 +6,14 @@
|
|||
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
/*
|
||||
@author dpie
|
||||
/**
|
||||
*
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*
|
||||
*/
|
||||
/*
|
||||
@author dpie
|
||||
|
||||
/**
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
|
@ -35,22 +38,7 @@ public abstract class ConnectDB {
|
|||
|
||||
static void init(Properties properties) throws ClassNotFoundException {
|
||||
|
||||
// To be initialized by a property file
|
||||
|
||||
// dbURL = properties.getProperty("Stats_db_Url");
|
||||
// dbUsername = properties.getProperty("Stats_db_User");
|
||||
// dbPassword = properties.getProperty("Stats_db_Pass");
|
||||
// defaultDBSchema = properties.getProperty("Stats_db_Schema");
|
||||
//
|
||||
// Class.forName(properties.getProperty("Stats_db_Driver"));
|
||||
|
||||
dbHiveUrl = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1";
|
||||
// dbImpalaUrl = "jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/;UseNativeQuery=1";
|
||||
// dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/;UseNativeQuery=1";
|
||||
// dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/openaire_prod_stats_20200731;UID=spyros;PWD=RU78N9sqQndnH3SQ;UseNativeQuery=1";
|
||||
// dbImpalaUrl = "jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/openaire_prod_stats_20200731;UID=spyros;PWD=RU78N9sqQndnH3SQ;UseNativeQuery=1";
|
||||
// dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/openaire_prod_stats_shadow_20200821;user=spyros;PWD=RU78N9sqQndnH3SQ";
|
||||
// dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:28000/;transportMode=http";
|
||||
dbImpalaUrl = "jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/;auth=noSasl";
|
||||
usageStatsDBSchema = "usagestats_20200913";
|
||||
statsDBSchema = "openaire_prod_stats_shadow_20200821";
|
||||
|
|
|
@ -11,7 +11,7 @@ import org.apache.commons.io.IOUtils;
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
/**
|
||||
* @author dpie, Spyros Zoupanos
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
public class ExecuteWorkflow {
|
||||
|
||||
|
|
|
@ -1,18 +1,9 @@
|
|||
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
*/
|
||||
/**
|
||||
* @author dpie
|
||||
*/
|
||||
import java.io.*;
|
||||
// import java.io.BufferedReader;
|
||||
// import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
|
@ -24,28 +15,26 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.parser.JSONParser;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Created by dpie on 20/01/2020.
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
public class IrusStats {
|
||||
|
||||
private String irusUKURL;
|
||||
|
||||
// private Connection conn = null;
|
||||
// private Statement stmt = null;
|
||||
|
||||
private final Logger log = Logger.getLogger(this.getClass());
|
||||
private static final Logger logger = LoggerFactory.getLogger(IrusStats.class);
|
||||
|
||||
public IrusStats(String irusUKURL) throws Exception {
|
||||
this.irusUKURL = irusUKURL;
|
||||
System.out.println("====> Creating Irus Stats tables");
|
||||
logger.info("Creating Irus Stats tables");
|
||||
createTables();
|
||||
System.out.println("====> Created Irus Stats tables");
|
||||
logger.info("Created Irus Stats tables");
|
||||
// The following may not be needed - It will be created when JSON tables are created
|
||||
// createTmpTables();
|
||||
}
|
||||
|
@ -53,14 +42,14 @@ public class IrusStats {
|
|||
private void createTables() throws Exception {
|
||||
try {
|
||||
|
||||
System.out.println("====> Creating sushilog");
|
||||
logger.info("Creating sushilog");
|
||||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".sushilog(source STRING, " +
|
||||
"repository STRING, rid STRING, date STRING, metric_type STRING, count INT) clustered by (source, " +
|
||||
"repository, rid, date, metric_type) into 100 buckets stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(sqlCreateTableSushiLog);
|
||||
System.out.println("====> Created sushilog");
|
||||
logger.info("Created sushilog");
|
||||
|
||||
// To see how to apply to the ignore duplicate rules and indexes
|
||||
// stmt.executeUpdate(sqlCreateTableSushiLog);
|
||||
|
@ -76,9 +65,9 @@ public class IrusStats {
|
|||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
log.info("Sushi Tables Created");
|
||||
logger.info("Sushi Tables Created");
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to create tables: " + e);
|
||||
logger.error("Failed to create tables: " + e);
|
||||
throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
@ -115,18 +104,18 @@ public class IrusStats {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Adding JSON Serde jar");
|
||||
logger.info("Adding JSON Serde jar");
|
||||
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
System.out.println("====> Added JSON Serde jar");
|
||||
logger.info("Added JSON Serde jar");
|
||||
|
||||
System.out.println("====> Dropping sushilogtmp_json table");
|
||||
logger.info("Dropping sushilogtmp_json table");
|
||||
String drop_sushilogtmp_json = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".sushilogtmp_json";
|
||||
stmt.executeUpdate(drop_sushilogtmp_json);
|
||||
System.out.println("====> Dropped sushilogtmp_json table");
|
||||
logger.info("Dropped sushilogtmp_json table");
|
||||
|
||||
System.out.println("====> Creating sushilogtmp_json table");
|
||||
logger.info("Creating sushilogtmp_json table");
|
||||
String create_sushilogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp_json(\n" +
|
||||
" `ItemIdentifier` ARRAY<\n" +
|
||||
|
@ -152,25 +141,25 @@ public class IrusStats {
|
|||
"LOCATION '" + ExecuteWorkflow.irusUKReportPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_sushilogtmp_json);
|
||||
System.out.println("====> Created sushilogtmp_json table");
|
||||
logger.info("Created sushilogtmp_json table");
|
||||
|
||||
System.out.println("====> Dropping sushilogtmp table");
|
||||
logger.info("Dropping sushilogtmp table");
|
||||
String drop_sushilogtmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".sushilogtmp";
|
||||
stmt.executeUpdate(drop_sushilogtmp);
|
||||
System.out.println("====> Dropped sushilogtmp table");
|
||||
logger.info("Dropped sushilogtmp table");
|
||||
|
||||
System.out.println("====> Creating sushilogtmp table");
|
||||
logger.info("Creating sushilogtmp table");
|
||||
String create_sushilogtmp = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".sushilogtmp(source STRING, repository STRING, " +
|
||||
"rid STRING, date STRING, metric_type STRING, count INT) clustered by (source) into 100 buckets stored as orc "
|
||||
+
|
||||
"tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(create_sushilogtmp);
|
||||
System.out.println("====> Created sushilogtmp table");
|
||||
logger.info("Created sushilogtmp table");
|
||||
|
||||
System.out.println("====> Inserting to sushilogtmp table");
|
||||
logger.info("Inserting to sushilogtmp table");
|
||||
String insert_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp " +
|
||||
"SELECT 'IRUS-UK', 'opendoar____::', `ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " +
|
||||
"`ItemPerf`.`Instance`.`MetricType`, `ItemPerf`.`Instance`.`Count` " +
|
||||
|
@ -179,7 +168,7 @@ public class IrusStats {
|
|||
"LATERAL VIEW posexplode(ItemPerformance) ItemPerformanceTable AS seqp, ItemPerf " +
|
||||
"WHERE `ItemIdent`.`Type`= 'OAI'";
|
||||
stmt.executeUpdate(insert_sushilogtmp);
|
||||
System.out.println("====> Inserted to sushilogtmp table");
|
||||
logger.info("Inserted to sushilogtmp table");
|
||||
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
||||
|
@ -214,8 +203,7 @@ public class IrusStats {
|
|||
+ simpleDateFormat.format(new Date())
|
||||
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
||||
|
||||
System.out.println("====> (processIrusRRReport) Getting report: " + reportUrl);
|
||||
log.info("Getting Irus report: " + reportUrl);
|
||||
logger.info("(processIrusRRReport) Getting report: " + reportUrl);
|
||||
|
||||
String text = getJson(reportUrl, "", "");
|
||||
|
||||
|
@ -235,8 +223,6 @@ public class IrusStats {
|
|||
for (Object identifier : itemIdentifier) {
|
||||
JSONObject opendoar = (JSONObject) identifier;
|
||||
if (opendoar.get("Type").toString().equals("OpenDOAR")) {
|
||||
// System.out.println(i + ": " + opendoar.get("Value").toString());
|
||||
log.info(i + ": " + opendoar.get("Value").toString());
|
||||
i++;
|
||||
getIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
|
||||
break;
|
||||
|
@ -245,12 +231,12 @@ public class IrusStats {
|
|||
// break;
|
||||
}
|
||||
|
||||
System.out.println("====> (processIrusRRReport) Finished with report: " + reportUrl);
|
||||
logger.info("(processIrusRRReport) Finished with report: " + reportUrl);
|
||||
}
|
||||
|
||||
private void getIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
|
||||
|
||||
System.out.println("====> (processIrusIRReport) Getting report(s) with opendoar: " + opendoar);
|
||||
logger.info("(processIrusIRReport) Getting report(s) with opendoar: " + opendoar);
|
||||
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
|
@ -327,7 +313,7 @@ public class IrusStats {
|
|||
preparedStatement.executeBatch();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
||||
System.out.println("====> (processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
|
||||
logger.info("(processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
|
||||
}
|
||||
|
||||
private String getJson(String url) throws Exception {
|
||||
|
@ -352,7 +338,7 @@ public class IrusStats {
|
|||
|
||||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to get URL: " + e);
|
||||
logger.error("Failed to get URL: " + e);
|
||||
System.out.println("Failed to get URL: " + e);
|
||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
|
@ -376,7 +362,7 @@ public class IrusStats {
|
|||
}
|
||||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to get URL", e);
|
||||
logger.error("Failed to get URL", e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,11 +15,15 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.parser.JSONParser;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
public class LaReferenciaDownloadLogs {
|
||||
|
||||
private final String piwikUrl;
|
||||
|
@ -33,7 +37,7 @@ public class LaReferenciaDownloadLogs {
|
|||
private final String format = "&format=json";
|
||||
private final String ApimethodGetAllSites = "?module=API&method=SitesManager.getSitesWithViewAccess";
|
||||
|
||||
private final Logger log = Logger.getLogger(this.getClass());
|
||||
private static final Logger logger = LoggerFactory.getLogger(LaReferenciaDownloadLogs.class);
|
||||
|
||||
public LaReferenciaDownloadLogs(String piwikUrl, String tokenAuth) throws Exception {
|
||||
this.piwikUrl = piwikUrl;
|
||||
|
@ -46,7 +50,7 @@ public class LaReferenciaDownloadLogs {
|
|||
try {
|
||||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
|
||||
System.out.println("====> Creating LaReferencia tables");
|
||||
logger.info("Creating LaReferencia tables");
|
||||
String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".lareferencialog(matomoid INT, " +
|
||||
"source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " +
|
||||
|
@ -54,7 +58,7 @@ public class LaReferenciaDownloadLogs {
|
|||
"clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " +
|
||||
"stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(sqlCreateTableLareferenciaLog);
|
||||
System.out.println("====> Created LaReferencia tables");
|
||||
logger.info("Created LaReferencia tables");
|
||||
// String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
||||
// + " ON INSERT TO lareferencialog "
|
||||
// + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit,"
|
||||
|
@ -67,10 +71,10 @@ public class LaReferenciaDownloadLogs {
|
|||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
log.info("Lareferencia Tables Created");
|
||||
logger.info("Lareferencia Tables Created");
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to create tables: " + e);
|
||||
logger.error("Failed to create tables: " + e);
|
||||
throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||
// System.exit(0);
|
||||
}
|
||||
|
@ -121,7 +125,7 @@ public class LaReferenciaDownloadLogs {
|
|||
|
||||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to get URL: " + e);
|
||||
logger.error("Failed to get URL: " + e);
|
||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
@ -144,7 +148,7 @@ public class LaReferenciaDownloadLogs {
|
|||
public void GetLaReFerenciaLogs(String repoLogsPath,
|
||||
int laReferencialMatomoID) throws Exception {
|
||||
|
||||
System.out.println("====> Downloading logs for LaReferencia repoid " + laReferencialMatomoID);
|
||||
logger.info("Downloading logs for LaReferencia repoid " + laReferencialMatomoID);
|
||||
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
|
@ -174,14 +178,10 @@ public class LaReferenciaDownloadLogs {
|
|||
rs_date.close();
|
||||
|
||||
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
||||
log
|
||||
logger
|
||||
.info(
|
||||
"Downloading logs for LaReferencia repoid " + laReferencialMatomoID + " and for "
|
||||
+ sdf.format(date));
|
||||
System.out
|
||||
.println(
|
||||
"====> Downloading logs for LaReferencia repoid " + laReferencialMatomoID + " and for "
|
||||
+ sdf.format(date));
|
||||
|
||||
String period = "&period=day&date=" + sdf.format(date);
|
||||
String outFolder = "";
|
||||
|
@ -215,9 +215,9 @@ public class LaReferenciaDownloadLogs {
|
|||
fin.writeChar('\n');
|
||||
}
|
||||
|
||||
System.out
|
||||
.println(
|
||||
"====> Downloaded part " + i + " of logs for LaReferencia repoid " + laReferencialMatomoID
|
||||
logger
|
||||
.info(
|
||||
"Downloaded part " + i + " of logs for LaReferencia repoid " + laReferencialMatomoID
|
||||
+ " and for "
|
||||
+ sdf.format(date));
|
||||
i++;
|
||||
|
|
|
@ -18,18 +18,23 @@ import org.apache.hadoop.fs.FileSystem;
|
|||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.parser.JSONParser;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
public class LaReferenciaStats {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(LaReferenciaStats.class);
|
||||
|
||||
private String logRepoPath;
|
||||
|
||||
private Statement stmt = null;
|
||||
|
||||
private final Logger log = Logger.getLogger(this.getClass());
|
||||
private String CounterRobotsURL;
|
||||
private ArrayList robotsList;
|
||||
|
||||
|
@ -47,7 +52,7 @@ public class LaReferenciaStats {
|
|||
try {
|
||||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
|
||||
System.out.println("====> Creating LaReferencia tables");
|
||||
logger.info("Creating LaReferencia tables");
|
||||
String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".lareferencialog(matomoid INT, " +
|
||||
"source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " +
|
||||
|
@ -55,7 +60,7 @@ public class LaReferenciaStats {
|
|||
"clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " +
|
||||
"stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(sqlCreateTableLareferenciaLog);
|
||||
System.out.println("====> Created LaReferencia tables");
|
||||
logger.info("Created LaReferencia tables");
|
||||
// String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
||||
// + " ON INSERT TO lareferencialog "
|
||||
// + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit,"
|
||||
|
@ -68,10 +73,10 @@ public class LaReferenciaStats {
|
|||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
log.info("Lareferencia Tables Created");
|
||||
logger.info("Lareferencia Tables Created");
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to create tables: " + e);
|
||||
logger.error("Failed to create tables: " + e);
|
||||
throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||
// System.exit(0);
|
||||
}
|
||||
|
@ -103,31 +108,26 @@ public class LaReferenciaStats {
|
|||
|
||||
public void processLogs() throws Exception {
|
||||
try {
|
||||
System.out.println("====> Processing LaReferencia repository logs");
|
||||
logger.info("Processing LaReferencia repository logs");
|
||||
// processlaReferenciaLog();
|
||||
System.out.println("====> LaReferencia repository logs process done");
|
||||
log.info("LaReferencia repository process done");
|
||||
logger.info("LaReferencia repository logs process done");
|
||||
|
||||
System.out.println("====> LaReferencia removing double clicks");
|
||||
logger.info("LaReferencia removing double clicks");
|
||||
// removeDoubleClicks();
|
||||
System.out.println("====> LaReferencia removed double clicks");
|
||||
log.info("LaReferencia removing double clicks done");
|
||||
logger.info("LaReferencia removed double clicks");
|
||||
|
||||
System.out.println("====> LaReferencia creating viewsStats");
|
||||
logger.info("LaReferencia creating viewsStats");
|
||||
// viewsStats();
|
||||
System.out.println("====> LaReferencia created viewsStats");
|
||||
log.info("LaReferencia views done");
|
||||
System.out.println("====> LaReferencia creating downloadsStats");
|
||||
logger.info("LaReferencia created viewsStats");
|
||||
logger.info("LaReferencia creating downloadsStats");
|
||||
// downloadsStats();
|
||||
System.out.println("====> LaReferencia created downloadsStats");
|
||||
log.info("LaReferencia downloads done");
|
||||
System.out.println("====> LaReferencia updating Production Tables");
|
||||
logger.info("LaReferencia created downloadsStats");
|
||||
logger.info("LaReferencia updating Production Tables");
|
||||
updateProdTables();
|
||||
System.out.println("====> LaReferencia updated Production Tables");
|
||||
log.info("LaReferencia update productions tables done");
|
||||
logger.info("LaReferencia updated Production Tables");
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to process logs: " + e);
|
||||
logger.error("Failed to process logs: " + e);
|
||||
throw new Exception("Failed to process logs: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
@ -136,18 +136,18 @@ public class LaReferenciaStats {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Adding JSON Serde jar");
|
||||
logger.info("Adding JSON Serde jar");
|
||||
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
System.out.println("====> Added JSON Serde jar");
|
||||
logger.info("Added JSON Serde jar");
|
||||
|
||||
System.out.println("====> Dropping lareferencialogtmp_json table");
|
||||
logger.info("Dropping lareferencialogtmp_json table");
|
||||
String drop_lareferencialogtmp_json = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".lareferencialogtmp_json";
|
||||
stmt.executeUpdate(drop_lareferencialogtmp_json);
|
||||
System.out.println("====> Dropped lareferencialogtmp_json table");
|
||||
logger.info("Dropped lareferencialogtmp_json table");
|
||||
|
||||
System.out.println("====> Creating lareferencialogtmp_json");
|
||||
logger.info("Creating lareferencialogtmp_json");
|
||||
String create_lareferencialogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".lareferencialogtmp_json(\n" +
|
||||
|
@ -177,16 +177,16 @@ public class LaReferenciaStats {
|
|||
"LOCATION '" + ExecuteWorkflow.lareferenciaLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_lareferencialogtmp_json);
|
||||
System.out.println("====> Created lareferencialogtmp_json");
|
||||
logger.info("Created lareferencialogtmp_json");
|
||||
|
||||
System.out.println("====> Dropping lareferencialogtmp table");
|
||||
logger.info("Dropping lareferencialogtmp table");
|
||||
String drop_lareferencialogtmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".lareferencialogtmp";
|
||||
stmt.executeUpdate(drop_lareferencialogtmp);
|
||||
System.out.println("====> Dropped lareferencialogtmp table");
|
||||
logger.info("Dropped lareferencialogtmp table");
|
||||
|
||||
System.out.println("====> Creating lareferencialogtmp");
|
||||
logger.info("Creating lareferencialogtmp");
|
||||
String create_lareferencialogtmp = "CREATE TABLE " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp(matomoid INT, " +
|
||||
"source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " +
|
||||
|
@ -194,9 +194,9 @@ public class LaReferenciaStats {
|
|||
"clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " +
|
||||
"stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(create_lareferencialogtmp);
|
||||
System.out.println("====> Created lareferencialogtmp");
|
||||
logger.info("Created lareferencialogtmp");
|
||||
|
||||
System.out.println("====> Inserting into lareferencialogtmp");
|
||||
logger.info("Inserting into lareferencialogtmp");
|
||||
String insert_lareferencialogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp " +
|
||||
"SELECT DISTINCT cast(idSite as INT) as matomoid, CONCAT('opendoar____::', " +
|
||||
"actiondetail.customVariables.`2`.customVariablePageValue2) as source, idVisit as id_Visit, country, " +
|
||||
|
@ -207,7 +207,7 @@ public class LaReferenciaStats {
|
|||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp_json " +
|
||||
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
|
||||
stmt.executeUpdate(insert_lareferencialogtmp);
|
||||
System.out.println("====> Inserted into lareferencialogtmp");
|
||||
logger.info("Inserted into lareferencialogtmp");
|
||||
|
||||
stmt.close();
|
||||
}
|
||||
|
@ -217,7 +217,7 @@ public class LaReferenciaStats {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Cleaning download double clicks");
|
||||
logger.info("Cleaning download double clicks");
|
||||
// clean download double clicks
|
||||
String sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp WHERE EXISTS (" +
|
||||
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp " +
|
||||
|
@ -231,10 +231,10 @@ public class LaReferenciaStats {
|
|||
"AND lareferencialogtmp.timestamp=p1.timestamp)";
|
||||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
System.out.println("====> Cleaned download double clicks");
|
||||
logger.info("Cleaned download double clicks");
|
||||
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
System.out.println("====> Cleaning action double clicks");
|
||||
logger.info("Cleaning action double clicks");
|
||||
// clean view double clicks
|
||||
sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp WHERE EXISTS (" +
|
||||
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp " +
|
||||
|
@ -248,7 +248,7 @@ public class LaReferenciaStats {
|
|||
"AND lareferencialogtmp.timestamp=p1.timestamp)";
|
||||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
System.out.println("====> Cleaned action double clicks");
|
||||
logger.info("Cleaned action double clicks");
|
||||
// conn.close();
|
||||
}
|
||||
|
||||
|
@ -257,7 +257,7 @@ public class LaReferenciaStats {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Creating la_result_views_monthly_tmp view");
|
||||
logger.info("Creating la_result_views_monthly_tmp view");
|
||||
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".la_result_views_monthly_tmp AS "
|
||||
+
|
||||
"SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
|
||||
|
@ -268,16 +268,16 @@ public class LaReferenciaStats {
|
|||
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
||||
"source ORDER BY source, entity_id";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Created la_result_views_monthly_tmp view");
|
||||
logger.info("Created la_result_views_monthly_tmp view");
|
||||
|
||||
System.out.println("====> Dropping la_views_stats_tmp table");
|
||||
logger.info("Dropping la_views_stats_tmp table");
|
||||
sql = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".la_views_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Dropped la_views_stats_tmp table");
|
||||
logger.info("Dropped la_views_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating la_views_stats_tmp table");
|
||||
logger.info("Creating la_views_stats_tmp table");
|
||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp " +
|
||||
"AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||
"max(views) AS count, max(openaire_referrer) AS openaire " +
|
||||
|
@ -287,7 +287,7 @@ public class LaReferenciaStats {
|
|||
"GROUP BY d.id, ro.id, month " +
|
||||
"ORDER BY d.id, ro.id, month";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Created la_views_stats_tmp table");
|
||||
logger.info("Created la_views_stats_tmp table");
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
@ -298,7 +298,7 @@ public class LaReferenciaStats {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Creating la_result_downloads_monthly_tmp view");
|
||||
logger.info("Creating la_result_downloads_monthly_tmp view");
|
||||
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".la_result_downloads_monthly_tmp AS " +
|
||||
"SELECT entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
|
||||
|
@ -309,16 +309,16 @@ public class LaReferenciaStats {
|
|||
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
||||
"source ORDER BY source, entity_id";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Created la_result_downloads_monthly_tmp view");
|
||||
logger.info("Created la_result_downloads_monthly_tmp view");
|
||||
|
||||
System.out.println("====> Dropping la_views_stats_tmp table");
|
||||
logger.info("Dropping la_views_stats_tmp table");
|
||||
sql = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".la_views_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Dropped la_views_stats_tmp table");
|
||||
logger.info("Dropped la_views_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating la_downloads_stats_tmp table");
|
||||
logger.info("Creating la_downloads_stats_tmp table");
|
||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp " +
|
||||
"AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
|
||||
|
@ -328,7 +328,7 @@ public class LaReferenciaStats {
|
|||
"GROUP BY d.id, ro.id, month " +
|
||||
"ORDER BY d.id, ro.id, month";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Created la_downloads_stats_tmp table");
|
||||
logger.info("Created la_downloads_stats_tmp table");
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
@ -339,12 +339,12 @@ public class LaReferenciaStats {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Updating lareferencialog");
|
||||
logger.info("Updating lareferencialog");
|
||||
String sql = "insert into " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialog " +
|
||||
"select * from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Updating views_stats");
|
||||
logger.info("Updating views_stats");
|
||||
sql = "insert into " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
|
||||
"select * from " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -352,7 +352,7 @@ public class LaReferenciaStats {
|
|||
// sql = "insert into public.views_stats select * from la_views_stats_tmp;";
|
||||
// stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Updating downloads_stats");
|
||||
logger.info("Updating downloads_stats");
|
||||
sql = "insert into " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
|
||||
"select * from " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -380,7 +380,7 @@ public class LaReferenciaStats {
|
|||
}
|
||||
// hdfs.close();
|
||||
} catch (Exception e) {
|
||||
log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logRepoPath));
|
||||
logger.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logRepoPath));
|
||||
throw new Exception("HDFS file path with exported data does not exist : " + logRepoPath, e);
|
||||
}
|
||||
|
||||
|
@ -413,7 +413,7 @@ public class LaReferenciaStats {
|
|||
|
||||
// fs.close();
|
||||
} catch (Exception e) {
|
||||
log.error(e);
|
||||
logger.error(e.getMessage());
|
||||
throw new Exception(e);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,11 +25,15 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.parser.JSONParser;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
public class PiwikDownloadLogs {
|
||||
|
||||
private final String piwikUrl;
|
||||
|
@ -42,7 +46,7 @@ public class PiwikDownloadLogs {
|
|||
private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
|
||||
private final String format = "&format=json";
|
||||
|
||||
private final Logger log = Logger.getLogger(this.getClass());
|
||||
private static final Logger logger = LoggerFactory.getLogger(PiwikDownloadLogs.class);
|
||||
|
||||
public PiwikDownloadLogs(String piwikUrl, String tokenAuth) {
|
||||
this.piwikUrl = piwikUrl;
|
||||
|
@ -75,7 +79,7 @@ public class PiwikDownloadLogs {
|
|||
|
||||
return response.toString();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to get URL: " + e);
|
||||
logger.error("Failed to get URL: " + e);
|
||||
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
@ -117,7 +121,7 @@ public class PiwikDownloadLogs {
|
|||
rs_date.close();
|
||||
|
||||
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
||||
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
|
||||
logger.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
|
||||
|
||||
String period = "&period=day&date=" + sdf.format(date);
|
||||
String outFolder = "";
|
||||
|
|
|
@ -19,11 +19,15 @@ import org.apache.hadoop.fs.FileSystem;
|
|||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.parser.JSONParser;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
public class PiwikStatsDB {
|
||||
|
||||
private String logPath;
|
||||
|
@ -32,7 +36,8 @@ public class PiwikStatsDB {
|
|||
|
||||
private Statement stmt = null;
|
||||
|
||||
private final Logger log = Logger.getLogger(this.getClass());
|
||||
private static final Logger logger = LoggerFactory.getLogger(PiwikStatsDB.class);
|
||||
|
||||
private String CounterRobotsURL;
|
||||
private ArrayList robotsList;
|
||||
|
||||
|
@ -77,7 +82,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(createDatabase);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to create database: " + e);
|
||||
logger.error("Failed to create database: " + e);
|
||||
throw new Exception("Failed to create database: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
@ -112,10 +117,9 @@ public class PiwikStatsDB {
|
|||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
log.info("Usage Tables Created");
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to create tables: " + e);
|
||||
logger.error("Failed to create tables: " + e);
|
||||
throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
@ -153,10 +157,9 @@ public class PiwikStatsDB {
|
|||
//////////////////////////////////////////////////
|
||||
|
||||
stmt.close();
|
||||
log.info("Usage Tmp Tables Created");
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to create tmptables: " + e);
|
||||
logger.error("Failed to create tmptables: " + e);
|
||||
throw new Exception("Failed to create tmp tables: " + e.toString(), e);
|
||||
// System.exit(0);
|
||||
}
|
||||
|
@ -167,47 +170,41 @@ public class PiwikStatsDB {
|
|||
ReadCounterRobotsList counterRobots = new ReadCounterRobotsList(this.getCounterRobotsURL());
|
||||
this.robotsList = counterRobots.getRobotsPatterns();
|
||||
|
||||
System.out.println("====> Processing repository logs");
|
||||
logger.info("Processing repository logs");
|
||||
// processRepositoryLog();
|
||||
System.out.println("====> Repository logs process done");
|
||||
log.info("repository process done");
|
||||
logger.info("Repository logs process done");
|
||||
|
||||
System.out.println("====> Removing double clicks");
|
||||
logger.info("Removing double clicks");
|
||||
// removeDoubleClicks();
|
||||
System.out.println("====> Removing double clicks done");
|
||||
log.info("removing double clicks done");
|
||||
logger.info("Removing double clicks done");
|
||||
|
||||
System.out.println("====> Cleaning oai");
|
||||
logger.info("Cleaning oai");
|
||||
// cleanOAI();
|
||||
System.out.println("====> Cleaning oai done");
|
||||
log.info("cleaning oai done");
|
||||
logger.info("Cleaning oai done");
|
||||
|
||||
System.out.println("====> ViewsStats processing starts");
|
||||
logger.info("ViewsStats processing starts");
|
||||
// viewsStats();
|
||||
System.out.println("====> ViewsStats processing ends");
|
||||
logger.info("ViewsStats processing ends");
|
||||
|
||||
System.out.println("====> DownloadsStats processing starts");
|
||||
logger.info("DownloadsStats processing starts");
|
||||
// downloadsStats();
|
||||
System.out.println("====> DownloadsStats processing starts");
|
||||
logger.info("DownloadsStats processing starts");
|
||||
|
||||
System.out.println("====> Processing portal logs");
|
||||
logger.info("Processing portal logs");
|
||||
// processPortalLog();
|
||||
System.out.println("====> Portal logs process done");
|
||||
log.info("portal process done");
|
||||
logger.info("Portal logs process done");
|
||||
|
||||
System.out.println("====> Processing portal usagestats");
|
||||
logger.info("Processing portal usagestats");
|
||||
// To see why this never ends
|
||||
portalStats();
|
||||
log.info("portal usagestats done");
|
||||
System.out.println("====> Portal usagestats process done");
|
||||
logger.info("Portal usagestats process done");
|
||||
|
||||
System.out.println("====> Updating Production Tables");
|
||||
logger.info("Updating Production Tables");
|
||||
// updateProdTables();
|
||||
System.out.println("====> Updated Production Tables");
|
||||
log.info("updateProdTables done");
|
||||
logger.info("Updated Production Tables");
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to process logs: " + e);
|
||||
logger.error("Failed to process logs: " + e);
|
||||
throw new Exception("Failed to process logs: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
@ -228,18 +225,18 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Adding JSON Serde jar");
|
||||
logger.info("Adding JSON Serde jar");
|
||||
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
System.out.println("====> Added JSON Serde jar");
|
||||
logger.info("Added JSON Serde jar");
|
||||
|
||||
System.out.println("====> Dropping piwiklogtmp_json table");
|
||||
logger.info("Dropping piwiklogtmp_json table");
|
||||
String drop_piwiklogtmp_json = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp_json";
|
||||
stmt.executeUpdate(drop_piwiklogtmp_json);
|
||||
System.out.println("====> Dropped piwiklogtmp_json table");
|
||||
logger.info("Dropped piwiklogtmp_json table");
|
||||
|
||||
System.out.println("====> Creating piwiklogtmp_json");
|
||||
logger.info("Creating piwiklogtmp_json");
|
||||
String create_piwiklogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp_json(\n" +
|
||||
|
@ -265,25 +262,25 @@ public class PiwikStatsDB {
|
|||
"LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_piwiklogtmp_json);
|
||||
System.out.println("====> Created piwiklogtmp_json");
|
||||
logger.info("Created piwiklogtmp_json");
|
||||
|
||||
System.out.println("====> Dropping piwiklogtmp table");
|
||||
logger.info("Dropping piwiklogtmp table");
|
||||
String drop_piwiklogtmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp";
|
||||
stmt.executeUpdate(drop_piwiklogtmp);
|
||||
System.out.println("====> Dropped piwiklogtmp");
|
||||
logger.info("Dropped piwiklogtmp");
|
||||
|
||||
System.out.println("====> Creating piwiklogtmp");
|
||||
logger.info("Creating piwiklogtmp");
|
||||
String create_piwiklogtmp = "CREATE TABLE " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp (source BIGINT, id_Visit STRING, country STRING, action STRING, url STRING, " +
|
||||
"entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
|
||||
"clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(create_piwiklogtmp);
|
||||
System.out.println("====> Created piwiklogtmp");
|
||||
logger.info("Created piwiklogtmp");
|
||||
|
||||
System.out.println("====> Inserting into piwiklogtmp");
|
||||
logger.info("Inserting into piwiklogtmp");
|
||||
String insert_piwiklogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, " +
|
||||
"actiondetail.type as action, actiondetail.url as url, " +
|
||||
|
@ -293,7 +290,7 @@ public class PiwikStatsDB {
|
|||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp_json\n" +
|
||||
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
|
||||
stmt.executeUpdate(insert_piwiklogtmp);
|
||||
System.out.println("====> Inserted into piwiklogtmp");
|
||||
logger.info("Inserted into piwiklogtmp");
|
||||
|
||||
stmt.close();
|
||||
}
|
||||
|
@ -302,7 +299,7 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Cleaning download double clicks");
|
||||
logger.info("Cleaning download double clicks");
|
||||
// clean download double clicks
|
||||
String sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"WHERE EXISTS (\n" +
|
||||
|
@ -316,10 +313,10 @@ public class PiwikStatsDB {
|
|||
"AND piwiklogtmp.source=p1.source AND piwiklogtmp.id_visit=p1.id_visit \n" +
|
||||
"AND piwiklogtmp.action=p1.action AND piwiklogtmp.entity_id=p1.entity_id AND piwiklogtmp.timestamp=p1.timestamp)";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Cleaned download double clicks");
|
||||
logger.info("Cleaned download double clicks");
|
||||
|
||||
// clean view double clicks
|
||||
System.out.println("====> Cleaning action double clicks");
|
||||
logger.info("Cleaning action double clicks");
|
||||
sql = "DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"WHERE EXISTS (\n" +
|
||||
"SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp \n" +
|
||||
|
@ -332,7 +329,7 @@ public class PiwikStatsDB {
|
|||
"AND piwiklogtmp.source=p1.source AND piwiklogtmp.id_visit=p1.id_visit \n" +
|
||||
"AND piwiklogtmp.action=p1.action AND piwiklogtmp.entity_id=p1.entity_id AND piwiklogtmp.timestamp=p1.timestamp)";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Cleaned action double clicks");
|
||||
logger.info("Cleaned action double clicks");
|
||||
stmt.close();
|
||||
}
|
||||
|
||||
|
@ -340,14 +337,14 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Dropping result_views_monthly_tmp table");
|
||||
logger.info("Dropping result_views_monthly_tmp table");
|
||||
String drop_result_views_monthly_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".result_views_monthly_tmp";
|
||||
stmt.executeUpdate(drop_result_views_monthly_tmp);
|
||||
System.out.println("====> Dropped result_views_monthly_tmp table");
|
||||
logger.info("Dropped result_views_monthly_tmp table");
|
||||
|
||||
System.out.println("====> Creating result_views_monthly_tmp table");
|
||||
logger.info("Creating result_views_monthly_tmp table");
|
||||
String create_result_views_monthly_tmp = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".result_views_monthly_tmp " +
|
||||
"AS SELECT entity_id AS id, " +
|
||||
|
@ -359,16 +356,16 @@ public class PiwikStatsDB {
|
|||
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
||||
"source ORDER BY source, entity_id";
|
||||
stmt.executeUpdate(create_result_views_monthly_tmp);
|
||||
System.out.println("====> Created result_views_monthly_tmp table");
|
||||
logger.info("Created result_views_monthly_tmp table");
|
||||
|
||||
System.out.println("====> Dropping views_stats_tmp table");
|
||||
logger.info("Dropping views_stats_tmp table");
|
||||
String drop_views_stats_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".views_stats_tmp";
|
||||
stmt.executeUpdate(drop_views_stats_tmp);
|
||||
System.out.println("====> Dropped views_stats_tmp table");
|
||||
logger.info("Dropped views_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating views_stats_tmp table");
|
||||
logger.info("Creating views_stats_tmp table");
|
||||
String create_views_stats_tmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".views_stats_tmp " +
|
||||
"AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||
|
@ -379,29 +376,29 @@ public class PiwikStatsDB {
|
|||
"GROUP BY d.id, ro.id, month " +
|
||||
"ORDER BY d.id, ro.id, month";
|
||||
stmt.executeUpdate(create_views_stats_tmp);
|
||||
System.out.println("====> Created views_stats_tmp table");
|
||||
logger.info("Created views_stats_tmp table");
|
||||
|
||||
System.out.println("====> Dropping views_stats table");
|
||||
logger.info("Dropping views_stats table");
|
||||
String drop_views_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".views_stats";
|
||||
stmt.executeUpdate(drop_views_stats);
|
||||
System.out.println("====> Dropped views_stats table");
|
||||
logger.info("Dropped views_stats table");
|
||||
|
||||
System.out.println("====> Creating views_stats table");
|
||||
logger.info("Creating views_stats table");
|
||||
String create_view_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
|
||||
stmt.executeUpdate(create_view_stats);
|
||||
System.out.println("====> Created views_stats table");
|
||||
logger.info("Created views_stats table");
|
||||
|
||||
System.out.println("====> Dropping pageviews_stats_tmp table");
|
||||
logger.info("Dropping pageviews_stats_tmp table");
|
||||
String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".pageviews_stats_tmp";
|
||||
stmt.executeUpdate(drop_pageviews_stats_tmp);
|
||||
System.out.println("====> Dropped pageviews_stats_tmp table");
|
||||
logger.info("Dropped pageviews_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating pageviews_stats_tmp table");
|
||||
logger.info("Creating pageviews_stats_tmp table");
|
||||
String create_pageviews_stats_tmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".pageviews_stats_tmp AS SELECT " +
|
||||
"'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count " +
|
||||
|
@ -411,21 +408,21 @@ public class PiwikStatsDB {
|
|||
"GROUP BY d.id, ro.id, month " +
|
||||
"ORDER BY d.id, ro.id, month";
|
||||
stmt.executeUpdate(create_pageviews_stats_tmp);
|
||||
System.out.println("====> Created pageviews_stats_tmp table");
|
||||
logger.info("Created pageviews_stats_tmp table");
|
||||
|
||||
System.out.println("====> Droping pageviews_stats table");
|
||||
logger.info("Droping pageviews_stats table");
|
||||
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".pageviews_stats";
|
||||
stmt.executeUpdate(drop_pageviews_stats);
|
||||
System.out.println("====> Dropped pageviews_stats table");
|
||||
logger.info("Dropped pageviews_stats table");
|
||||
|
||||
System.out.println("====> Creating pageviews_stats table");
|
||||
logger.info("Creating pageviews_stats table");
|
||||
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".pageviews_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
|
||||
stmt.executeUpdate(create_pageviews_stats);
|
||||
System.out.println("====> Created pageviews_stats table");
|
||||
logger.info("Created pageviews_stats table");
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
@ -435,14 +432,14 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Dropping result_downloads_monthly_tmp view");
|
||||
logger.info("Dropping result_downloads_monthly_tmp view");
|
||||
String drop_result_views_monthly_tmp = "DROP VIEW IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".result_views_monthly_tmp";
|
||||
stmt.executeUpdate(drop_result_views_monthly_tmp);
|
||||
System.out.println("====> Dropped result_downloads_monthly_tmp view");
|
||||
logger.info("Dropped result_downloads_monthly_tmp view");
|
||||
|
||||
System.out.println("====> Creating result_views_monthly_tmp view");
|
||||
logger.info("Creating result_views_monthly_tmp view");
|
||||
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " +
|
||||
"AS SELECT entity_id AS id, COUNT(entity_id) as downloads, " +
|
||||
"SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
||||
|
@ -452,16 +449,16 @@ public class PiwikStatsDB {
|
|||
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source " +
|
||||
"ORDER BY source, entity_id, month";
|
||||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Created result_views_monthly_tmp view");
|
||||
logger.info("Created result_views_monthly_tmp view");
|
||||
|
||||
System.out.println("====> Dropping downloads_stats_tmp table");
|
||||
logger.info("Dropping downloads_stats_tmp table");
|
||||
String drop_views_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".downloads_stats_tmp";
|
||||
stmt.executeUpdate(drop_views_stats);
|
||||
System.out.println("====> Dropped downloads_stats_tmp table");
|
||||
logger.info("Dropped downloads_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating downloads_stats_tmp table");
|
||||
logger.info("Creating downloads_stats_tmp table");
|
||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
|
||||
"SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
|
||||
|
@ -470,26 +467,26 @@ public class PiwikStatsDB {
|
|||
"WHERE p.source=d.piwik_id and p.id=ro.oid " +
|
||||
"GROUP BY d.id, ro.id, month " +
|
||||
"ORDER BY d.id, ro.id, month";
|
||||
System.out.println("====> Created downloads_stats_tmp table");
|
||||
logger.info("Created downloads_stats_tmp table");
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Dropping downloads_stats table");
|
||||
logger.info("Dropping downloads_stats table");
|
||||
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".downloads_stats";
|
||||
stmt.executeUpdate(drop_pageviews_stats);
|
||||
System.out.println("====> Dropped downloads_stats table");
|
||||
logger.info("Dropped downloads_stats table");
|
||||
|
||||
System.out.println("====> Creating downloads_stats table");
|
||||
logger.info("Creating downloads_stats table");
|
||||
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".downloads_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
|
||||
stmt.executeUpdate(create_pageviews_stats);
|
||||
System.out.println("====> Created downloads_stats table");
|
||||
logger.info("Created downloads_stats table");
|
||||
|
||||
System.out.println("====> Dropping result_downloads_monthly_tmp view");
|
||||
logger.info("Dropping result_downloads_monthly_tmp view");
|
||||
sql = "DROP VIEW IF EXISTS result_downloads_monthly_tmp";
|
||||
System.out.println("====> Dropped result_downloads_monthly_tmp view");
|
||||
logger.info("Dropped result_downloads_monthly_tmp view");
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
stmt.close();
|
||||
|
@ -721,18 +718,18 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Adding JSON Serde jar");
|
||||
logger.info("Adding JSON Serde jar");
|
||||
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
System.out.println("====> Added JSON Serde jar");
|
||||
logger.info("Added JSON Serde jar");
|
||||
|
||||
System.out.println("====> Dropping process_portal_log_tmp_json table");
|
||||
logger.info("Dropping process_portal_log_tmp_json table");
|
||||
String drop_process_portal_log_tmp_json = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp_json";
|
||||
stmt.executeUpdate(drop_process_portal_log_tmp_json);
|
||||
System.out.println("====> Dropped process_portal_log_tmp_json table");
|
||||
logger.info("Dropped process_portal_log_tmp_json table");
|
||||
|
||||
System.out.println("====> Creating process_portal_log_tmp_json");
|
||||
logger.info("Creating process_portal_log_tmp_json");
|
||||
String create_process_portal_log_tmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp_json(" +
|
||||
" `idSite` STRING,\n" +
|
||||
|
@ -752,25 +749,25 @@ public class PiwikStatsDB {
|
|||
"LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_process_portal_log_tmp_json);
|
||||
System.out.println("====> Created process_portal_log_tmp_json");
|
||||
logger.info("Created process_portal_log_tmp_json");
|
||||
|
||||
System.out.println("====> Droping process_portal_log_tmp table");
|
||||
logger.info("Droping process_portal_log_tmp table");
|
||||
String drop_process_portal_log_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".process_portal_log_tmp";
|
||||
stmt.executeUpdate(drop_process_portal_log_tmp);
|
||||
System.out.println("====> Dropped process_portal_log_tmp");
|
||||
logger.info("Dropped process_portal_log_tmp");
|
||||
|
||||
System.out.println("====> Creating process_portal_log_tmp");
|
||||
logger.info("Creating process_portal_log_tmp");
|
||||
String create_process_portal_log_tmp = "CREATE TABLE " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".process_portal_log_tmp (source BIGINT, id_visit STRING, country STRING, action STRING, url STRING, " +
|
||||
"entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
|
||||
"clustered by (source, id_visit, timestamp) into 100 buckets stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(create_process_portal_log_tmp);
|
||||
System.out.println("====> Created process_portal_log_tmp");
|
||||
logger.info("Created process_portal_log_tmp");
|
||||
|
||||
System.out.println("====> Inserting into process_portal_log_tmp");
|
||||
logger.info("Inserting into process_portal_log_tmp");
|
||||
String insert_process_portal_log_tmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".process_portal_log_tmp " +
|
||||
"SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, actiondetail.type as action, "
|
||||
|
@ -802,7 +799,7 @@ public class PiwikStatsDB {
|
|||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp_json " +
|
||||
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
|
||||
stmt.executeUpdate(insert_process_portal_log_tmp);
|
||||
System.out.println("====> Inserted into process_portal_log_tmp");
|
||||
logger.info("Inserted into process_portal_log_tmp");
|
||||
|
||||
stmt.close();
|
||||
}
|
||||
|
@ -837,7 +834,7 @@ public class PiwikStatsDB {
|
|||
// IN (SELECT roid.oid FROM openaire_prod_stats_20200821.result_oids roid WHERE roid.oid IS NOT NULL AND
|
||||
// roid.oid != '');
|
||||
|
||||
System.out.println("====> PortalStats - Step 1");
|
||||
logger.info("PortalStats - Step 1");
|
||||
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'oaItem', `timestamp`, referrer_name, agent "
|
||||
+
|
||||
|
@ -848,7 +845,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> PortalStats - Step 2");
|
||||
logger.info("PortalStats - Step 2");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'datasource', `timestamp`, referrer_name, agent "
|
||||
|
@ -860,7 +857,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> PortalStats - Step 3");
|
||||
logger.info("PortalStats - Step 3");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'organization', `timestamp`, referrer_name, agent "
|
||||
|
@ -872,7 +869,7 @@ public class PiwikStatsDB {
|
|||
// stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> PortalStats - Step 4");
|
||||
logger.info("PortalStats - Step 4");
|
||||
stmt = con.createStatement();
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SELECT DISTINCT source, id_visit, country, action, url, entity_id, 'project', `timestamp`, referrer_name, agent "
|
||||
|
@ -890,7 +887,7 @@ public class PiwikStatsDB {
|
|||
private void cleanOAI() throws Exception {
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 1");
|
||||
logger.info("Cleaning oai - Step 1");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
String sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chlc.min-saude.pt/'," +
|
||||
|
@ -898,7 +895,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 2");
|
||||
logger.info("Cleaning oai - Step 2");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hospitaldebraga.pt/'," +
|
||||
|
@ -906,7 +903,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 3");
|
||||
logger.info("Cleaning oai - Step 3");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipl.pt/'," +
|
||||
|
@ -914,7 +911,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 4");
|
||||
logger.info("Cleaning oai - Step 4");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:bibliotecadigital.ipb.pt/'," +
|
||||
|
@ -922,7 +919,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 5");
|
||||
logger.info("Cleaning oai - Step 5");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ismai.pt/'," +
|
||||
|
@ -930,7 +927,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 6");
|
||||
logger.info("Cleaning oai - Step 6");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorioaberto.uab.pt/'," +
|
||||
|
@ -938,7 +935,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 7");
|
||||
logger.info("Cleaning oai - Step 7");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.uac.pt/'," +
|
||||
|
@ -946,7 +943,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 8");
|
||||
logger.info("Cleaning oai - Step 8");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.insa.pt/'," +
|
||||
|
@ -954,7 +951,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 9");
|
||||
logger.info("Cleaning oai - Step 9");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipcb.pt/'," +
|
||||
|
@ -962,7 +959,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 10");
|
||||
logger.info("Cleaning oai - Step 10");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ispa.pt/'," +
|
||||
|
@ -970,7 +967,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 11");
|
||||
logger.info("Cleaning oai - Step 11");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chporto.pt/'," +
|
||||
|
@ -978,7 +975,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 12");
|
||||
logger.info("Cleaning oai - Step 12");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ucp.pt/'," +
|
||||
|
@ -986,7 +983,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 13");
|
||||
logger.info("Cleaning oai - Step 13");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:rihuc.huc.min-saude.pt/'," +
|
||||
|
@ -994,7 +991,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 14");
|
||||
logger.info("Cleaning oai - Step 14");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipv.pt/'," +
|
||||
|
@ -1002,7 +999,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 15");
|
||||
logger.info("Cleaning oai - Step 15");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:www.repository.utl.pt/'," +
|
||||
|
@ -1010,7 +1007,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 16");
|
||||
logger.info("Cleaning oai - Step 16");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:run.unl.pt/'," +
|
||||
|
@ -1018,7 +1015,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 17");
|
||||
logger.info("Cleaning oai - Step 17");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:sapientia.ualg.pt/'," +
|
||||
|
@ -1026,7 +1023,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 18");
|
||||
logger.info("Cleaning oai - Step 18");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipsantarem.pt/'," +
|
||||
|
@ -1034,7 +1031,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 19");
|
||||
logger.info("Cleaning oai - Step 19");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:arca.igc.gulbenkian.pt/'," +
|
||||
|
@ -1042,7 +1039,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 20");
|
||||
logger.info("Cleaning oai - Step 20");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:ubibliorum.ubi.pt/'," +
|
||||
|
@ -1050,7 +1047,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 21");
|
||||
logger.info("Cleaning oai - Step 21");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:digituma.uma.pt/'," +
|
||||
|
@ -1058,7 +1055,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 22");
|
||||
logger.info("Cleaning oai - Step 22");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ul.pt/'," +
|
||||
|
@ -1066,7 +1063,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 23");
|
||||
logger.info("Cleaning oai - Step 23");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hff.min-saude.pt/'," +
|
||||
|
@ -1074,7 +1071,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 24");
|
||||
logger.info("Cleaning oai - Step 24");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorium.sdum.uminho.pt/'," +
|
||||
|
@ -1082,7 +1079,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 25");
|
||||
logger.info("Cleaning oai - Step 25");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:recipp.ipp.pt/'," +
|
||||
|
@ -1090,7 +1087,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 26");
|
||||
logger.info("Cleaning oai - Step 26");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:bdigital.ufp.pt/'," +
|
||||
|
@ -1098,7 +1095,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 27");
|
||||
logger.info("Cleaning oai - Step 27");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.lneg.pt/'," +
|
||||
|
@ -1106,7 +1103,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 28");
|
||||
logger.info("Cleaning oai - Step 28");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:iconline.ipleiria.pt/'," +
|
||||
|
@ -1114,7 +1111,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Step 29");
|
||||
logger.info("Cleaning oai - Step 29");
|
||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:comum.rcaap.pt/'," +
|
||||
|
@ -1122,7 +1119,7 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
System.out.println("====> Cleaning oai - Done, closing connection");
|
||||
logger.info("Cleaning oai - Done, closing connection");
|
||||
ConnectDB.getHiveConnection().close();
|
||||
}
|
||||
|
||||
|
@ -1132,7 +1129,7 @@ public class PiwikStatsDB {
|
|||
try {
|
||||
url = URLDecoder.decode(url, "UTF-8");
|
||||
} catch (Exception e) {
|
||||
log.info(url);
|
||||
logger.info("Error when decoding the following URL: " + url);
|
||||
}
|
||||
if (url.indexOf("datasourceId=") > 0 && url.substring(url.indexOf("datasourceId=") + 13).length() >= 46) {
|
||||
url = "datasource|"
|
||||
|
@ -1169,46 +1166,45 @@ public class PiwikStatsDB {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Inserting data to piwiklog");
|
||||
logger.info("Inserting data to piwiklog");
|
||||
String sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog " +
|
||||
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Inserting data to views_stats");
|
||||
logger.info("Inserting data to views_stats");
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
|
||||
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Inserting data to downloads_stats");
|
||||
logger.info("Inserting data to downloads_stats");
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
|
||||
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Inserting data to pageviews_stats");
|
||||
logger.info("Inserting data to pageviews_stats");
|
||||
sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
|
||||
"SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Dropping table views_stats_tmp");
|
||||
logger.info("Dropping table views_stats_tmp");
|
||||
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Dropping table downloads_stats_tmp");
|
||||
logger.info("Dropping table downloads_stats_tmp");
|
||||
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Dropping table pageviews_stats_tmp");
|
||||
logger.info("Dropping table pageviews_stats_tmp");
|
||||
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
System.out.println("====> Dropping table process_portal_log_tmp");
|
||||
logger.info("Dropping table process_portal_log_tmp");
|
||||
sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp";
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
||||
log.info("updateProdTables done");
|
||||
}
|
||||
|
||||
private ArrayList<String> listHdfsDir(String dir) throws Exception {
|
||||
|
@ -1227,7 +1223,7 @@ public class PiwikStatsDB {
|
|||
|
||||
hdfs.close();
|
||||
} catch (Exception e) {
|
||||
log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logPath));
|
||||
logger.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logPath));
|
||||
throw new Exception("HDFS file path with exported data does not exist : " + logPath, e);
|
||||
}
|
||||
|
||||
|
@ -1260,7 +1256,7 @@ public class PiwikStatsDB {
|
|||
|
||||
// fs.close();
|
||||
} catch (Exception e) {
|
||||
log.error(e);
|
||||
logger.error(e.getMessage());
|
||||
throw new Exception(e);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,10 +7,13 @@
|
|||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
*
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -21,20 +21,21 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.parser.JSONParser;
|
||||
import org.json.simple.parser.ParseException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Created by dpie
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
public class SarcStats {
|
||||
|
||||
private Statement stmt = null;
|
||||
|
||||
private final Logger log = Logger.getLogger(this.getClass());
|
||||
private static final Logger logger = LoggerFactory.getLogger(SarcStats.class);
|
||||
|
||||
public SarcStats() throws Exception {
|
||||
// createTables();
|
||||
|
@ -61,9 +62,9 @@ public class SarcStats {
|
|||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
log.info("Sushi Tables Created");
|
||||
logger.info("Sushi Tables Created");
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to create tables: " + e);
|
||||
logger.error("Failed to create tables: " + e);
|
||||
throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
@ -73,18 +74,18 @@ public class SarcStats {
|
|||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Adding JSON Serde jar");
|
||||
logger.info("Adding JSON Serde jar");
|
||||
stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
|
||||
System.out.println("====> Added JSON Serde jar");
|
||||
logger.info("Added JSON Serde jar");
|
||||
|
||||
System.out.println("====> Dropping sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
|
||||
logger.info("Dropping sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
|
||||
String drop_sarc_sushilogtmp_json_array = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".sarc_sushilogtmp_json_array_" + issn.replace("-", "_");
|
||||
stmt.executeUpdate(drop_sarc_sushilogtmp_json_array);
|
||||
System.out.println("====> Dropped sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
|
||||
logger.info("Dropped sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
|
||||
|
||||
System.out.println("====> Creating sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
|
||||
logger.info("Creating sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
|
||||
String create_sarc_sushilogtmp_json_array = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + "(\n" +
|
||||
" `ItemIdentifier` ARRAY<\n" +
|
||||
|
@ -108,16 +109,16 @@ public class SarcStats {
|
|||
"LOCATION '" + sarcsReportPathArray + "/" + issn + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_sarc_sushilogtmp_json_array);
|
||||
System.out.println("====> Created sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
|
||||
logger.info("Created sarc_sushilogtmp_json_array_" + issn.replace("-", "_") + " table");
|
||||
|
||||
System.out.println("====> Dropping sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
|
||||
logger.info("Dropping sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
|
||||
String drop_sarc_sushilogtmp_json_non_array = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_");
|
||||
stmt.executeUpdate(drop_sarc_sushilogtmp_json_non_array);
|
||||
System.out.println("====> Dropped sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
|
||||
logger.info("Dropped sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
|
||||
|
||||
System.out.println("====> Creating sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
|
||||
logger.info("Creating sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
|
||||
String create_sarc_sushilogtmp_json_non_array = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + "(\n" +
|
||||
" `ItemIdentifier` struct<\n" +
|
||||
|
@ -139,18 +140,18 @@ public class SarcStats {
|
|||
"LOCATION '" + sarcsReportPathNonArray + "/" + issn + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_sarc_sushilogtmp_json_non_array);
|
||||
System.out.println("====> Created sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
|
||||
logger.info("Created sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_") + " table");
|
||||
|
||||
System.out.println("====> Creating sarc_sushilogtmp table");
|
||||
logger.info("Creating sarc_sushilogtmp table");
|
||||
String create_sarc_sushilogtmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".sarc_sushilogtmp(source STRING, repository STRING, " +
|
||||
"rid STRING, date STRING, metric_type STRING, count INT) clustered by (source) into 100 buckets stored as orc "
|
||||
+
|
||||
"tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(create_sarc_sushilogtmp);
|
||||
System.out.println("====> Created sarc_sushilogtmp table");
|
||||
logger.info("Created sarc_sushilogtmp table");
|
||||
|
||||
System.out.println("====> Inserting to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
|
||||
logger.info("Inserting to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
|
||||
String insert_sarc_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp " +
|
||||
"SELECT 'SARC-OJS', '" + issn + "' , `ItemIdent`.`Value`, `ItemPerformance`.`Period`.`Begin`, " +
|
||||
"`ItemPerformance`.`Instance`.`MetricType`, `ItemPerformance`.`Instance`.`Count` " +
|
||||
|
@ -158,15 +159,15 @@ public class SarcStats {
|
|||
+
|
||||
"LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent ";
|
||||
stmt.executeUpdate(insert_sarc_sushilogtmp);
|
||||
System.out.println("====> Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
|
||||
logger.info("Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
|
||||
|
||||
System.out.println("====> Inserting to sarc_sushilogtmp table (sarc_sushilogtmp_json_non_array)");
|
||||
logger.info("Inserting to sarc_sushilogtmp table (sarc_sushilogtmp_json_non_array)");
|
||||
insert_sarc_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp " +
|
||||
"SELECT 'SARC-OJS', '" + issn + "' , `ItemIdentifier`.`Value`, `ItemPerformance`.`Period`.`Begin`, " +
|
||||
"`ItemPerformance`.`Instance`.`MetricType`, `ItemPerformance`.`Instance`.`Count` " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_non_array_" + issn.replace("-", "_");
|
||||
stmt.executeUpdate(insert_sarc_sushilogtmp);
|
||||
System.out.println("====> Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_non_array)");
|
||||
logger.info("Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_non_array)");
|
||||
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
||||
|
@ -179,12 +180,12 @@ public class SarcStats {
|
|||
|
||||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
System.out.println("====> Dropping sarc_sushilogtmp table");
|
||||
logger.info("Dropping sarc_sushilogtmp table");
|
||||
String drop_sarc_sushilogtmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".sarc_sushilogtmp";
|
||||
stmt.executeUpdate(drop_sarc_sushilogtmp);
|
||||
System.out.println("====> Dropped sarc_sushilogtmp table");
|
||||
logger.info("Dropped sarc_sushilogtmp table");
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
||||
List<String[]> issnAndUrls = new ArrayList<String[]>();
|
||||
|
@ -252,7 +253,7 @@ public class SarcStats {
|
|||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
// Insert into downloads_stats
|
||||
System.out.println("====> Inserting into downloads_stats");
|
||||
logger.info("Inserting into downloads_stats");
|
||||
// String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, public.datasource_oids d, public.datasource_results dr, public.result_pids ro WHERE d.orid LIKE '%' || s.repository || '%' AND dr.id=d.id AND dr.result=ro.id AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS';";
|
||||
String insertDStats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".downloads_stats SELECT s.source, d.id AS repository_id, " +
|
||||
|
@ -264,14 +265,14 @@ public class SarcStats {
|
|||
"WHERE d.oid LIKE CONCAT('%', s.repository, '%') AND dr.id=d.id AND dr.result=ro.id AND " +
|
||||
"s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS'";
|
||||
stmt.executeUpdate(insertDStats);
|
||||
System.out.println("====> Inserted into downloads_stats");
|
||||
logger.info("Inserted into downloads_stats");
|
||||
|
||||
// Insert into sushilog
|
||||
System.out.println("====> Inserting into sushilog");
|
||||
logger.info("Inserting into sushilog");
|
||||
String insertSushiLog = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".sushilog SELECT * " + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp";
|
||||
stmt.executeUpdate(insertSushiLog);
|
||||
System.out.println("====> Inserted into sushilog");
|
||||
logger.info("Inserted into sushilog");
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
@ -279,7 +280,7 @@ public class SarcStats {
|
|||
|
||||
public void getARReport(String sarcsReportPathArray, String sarcsReportPathNonArray,
|
||||
String url, String issn) throws Exception {
|
||||
log.info("Processing SARC! issn: " + issn + " with url: " + url);
|
||||
logger.info("Processing SARC! issn: " + issn + " with url: " + url);
|
||||
ConnectDB.getHiveConnection().setAutoCommit(false);
|
||||
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
@ -483,7 +484,7 @@ public class SarcStats {
|
|||
} catch (Exception e) {
|
||||
|
||||
// Logging error and silently continuing
|
||||
log.error("Failed to get URL: " + e);
|
||||
logger.error("Failed to get URL: " + e);
|
||||
System.out.println("Failed to get URL: " + e);
|
||||
// return null;
|
||||
// throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||
|
|
|
@ -3,21 +3,24 @@ package eu.dnetlib.oa.graph.usagestats.export;
|
|||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Main class for downloading and processing Usage statistics
|
||||
*
|
||||
* @author D. Pierrakos, S. Zoupanos
|
||||
*/
|
||||
public class UsageStatsExporter {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
|
||||
private Properties properties;
|
||||
|
||||
public void runImpalaQuery() throws Exception {
|
||||
Statement stmt = ConnectDB.getImpalaConnection().createStatement();
|
||||
ConnectDB.getImpalaConnection().setAutoCommit(false);
|
||||
|
||||
System.out.println("====> Executing Impala query");
|
||||
logger.info("Executing Impala query");
|
||||
Statement statement = ConnectDB.getImpalaConnection().createStatement();
|
||||
|
||||
ResultSet rs = statement
|
||||
|
@ -40,45 +43,36 @@ public class UsageStatsExporter {
|
|||
|
||||
public void export() throws Exception {
|
||||
|
||||
logger.info("=====> Test of the logger (info)");
|
||||
logger.debug("=====> Test of the logger (debug)");
|
||||
logger.error("=====> Test of the logger (error)");
|
||||
|
||||
// connect to DB
|
||||
System.out.println("====> Initialising DB properties");
|
||||
ConnectDB.init(properties);
|
||||
|
||||
runImpalaQuery();
|
||||
System.exit(0);
|
||||
// runImpalaQuery();
|
||||
|
||||
// Create DB tables - they are also needed to download the statistics too
|
||||
System.out.println("====> Creating database and tables");
|
||||
logger.info("Creating database and tables");
|
||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
||||
//
|
||||
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||
// // the moment
|
||||
System.out.println("====> Initializing the download logs module");
|
||||
logger.info("Initializing the download logs module");
|
||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken);
|
||||
System.out.println("====> Downloading piwik logs");
|
||||
logger.info("Downloading piwik logs");
|
||||
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||
System.out.println("====> Downloaded piwik logs");
|
||||
logger.info("Downloaded piwik logs");
|
||||
|
||||
// Create DB tables, insert/update statistics
|
||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||
System.out.println("====> Processing logs");
|
||||
logger.info("Processing logs");
|
||||
piwikstatsdb.processLogs();
|
||||
// log.info("process logs done");
|
||||
|
||||
System.out.println("====> Creating LaReferencia tables");
|
||||
logger.info("Creating LaReferencia tables");
|
||||
LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(ExecuteWorkflow.lareferenciaBaseURL,
|
||||
ExecuteWorkflow.lareferenciaAuthToken);
|
||||
System.out.println("====> Downloading LaReferencia logs");
|
||||
logger.info("Downloading LaReferencia logs");
|
||||
// lrf.GetLaReferenciaRepos(lareferenciaLogPath);
|
||||
System.out.println("====> Downloaded LaReferencia logs");
|
||||
logger.info("Downloaded LaReferencia logs");
|
||||
LaReferenciaStats lastats = new LaReferenciaStats(ExecuteWorkflow.lareferenciaLogPath);
|
||||
System.out.println("====> Processing LaReferencia logs");
|
||||
logger.info("Processing LaReferencia logs");
|
||||
// lastats.processLogs();
|
||||
// log.info("LaReferencia logs done");
|
||||
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Revision 58415: /dnet45/modules/dnet-openaire-usage-stats-export-wf/trunk/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2>Revision 58415: /dnet45/modules/dnet-openaire-usage-stats-export-wf/trunk/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export</h2>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="../">..</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ConnectDB.java">ConnectDB.java</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ExecuteWorkflow.java">ExecuteWorkflow.java</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="IrusStats.java">IrusStats.java</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="PiwikDownloadLogs.java">PiwikDownloadLogs.java</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="PiwikStatsDB.java">PiwikStatsDB.java</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ReadCounterRobotsList.java">ReadCounterRobotsList.java</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="SarcStats.java">SarcStats.java</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="UsageStatsExporter.java">UsageStatsExporter.java</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr noshade>
|
||||
<em>
|
||||
Powered by
|
||||
<a href="http://subversion.tigris.org/">Subversion</a>
|
||||
version 1.4.4 (r25188).
|
||||
</em>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue