2020-12-12 11:00:14 +01:00
|
|
|
|
2020-11-17 07:36:51 +01:00
|
|
|
package eu.dnetlib.oa.graph.usagestatsbuild.export;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.sql.SQLException;
|
|
|
|
import java.sql.Statement;
|
|
|
|
|
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
|
|
import org.apache.hadoop.fs.Path;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Main class for downloading and processing Usage statistics
|
|
|
|
*
|
|
|
|
* @author D. Pierrakos, S. Zoupanos
|
|
|
|
*/
|
|
|
|
public class UsageStatsExporter {
|
|
|
|
|
2020-12-12 11:00:14 +01:00
|
|
|
public UsageStatsExporter() {
|
2020-11-17 07:36:51 +01:00
|
|
|
|
2020-12-12 11:00:14 +01:00
|
|
|
}
|
2020-11-17 07:36:51 +01:00
|
|
|
|
2020-12-12 11:00:14 +01:00
|
|
|
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
|
2020-11-17 07:36:51 +01:00
|
|
|
|
2020-12-12 11:00:14 +01:00
|
|
|
public void export() throws Exception {
|
2020-11-17 07:36:51 +01:00
|
|
|
|
2020-12-12 11:00:14 +01:00
|
|
|
logger.info("Initialising DB properties");
|
|
|
|
ConnectDB.init();
|
2020-11-17 07:36:51 +01:00
|
|
|
|
|
|
|
// runImpalaQuery();
|
2020-12-12 11:00:14 +01:00
|
|
|
PiwikStatsDB piwikstatsdb = new PiwikStatsDB();
|
2020-11-17 07:36:51 +01:00
|
|
|
|
2020-12-12 11:00:14 +01:00
|
|
|
logger.info("Re-creating database and tables");
|
|
|
|
if (ExecuteWorkflow.recreateDbAndTables) {
|
|
|
|
piwikstatsdb.recreateDBAndTables();
|
|
|
|
logger.info("DB-Tables are created ");
|
|
|
|
}
|
2020-11-17 07:36:51 +01:00
|
|
|
// else {
|
|
|
|
// piwikstatsdb.createTmpTables();
|
|
|
|
// logger.info("TmpTables are created ");
|
|
|
|
// }
|
2020-12-12 11:00:14 +01:00
|
|
|
if (ExecuteWorkflow.processPiwikLogs) {
|
|
|
|
logger.info("Processing Piwik logs");
|
|
|
|
piwikstatsdb.processLogs();
|
|
|
|
logger.info("Piwik logs Done");
|
|
|
|
logger.info("Processing Pedocs Old Stats");
|
|
|
|
piwikstatsdb.uploadOldPedocs();
|
|
|
|
logger.info("Processing Pedocs Old Stats Done");
|
|
|
|
logger.info("Processing TUDELFT Stats");
|
|
|
|
piwikstatsdb.uploadTUDELFTStats();
|
|
|
|
logger.info("Processing TUDELFT Stats Done");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
LaReferenciaStats lastats = new LaReferenciaStats();
|
|
|
|
|
|
|
|
if (ExecuteWorkflow.processLaReferenciaLogs) {
|
|
|
|
logger.info("Processing LaReferencia logs");
|
|
|
|
lastats.processLogs();
|
|
|
|
logger.info("LaReferencia logs done");
|
|
|
|
}
|
|
|
|
|
|
|
|
IrusStats irusstats = new IrusStats();
|
|
|
|
|
|
|
|
if (ExecuteWorkflow.irusProcessStats) {
|
|
|
|
logger.info("Processing IRUS");
|
|
|
|
irusstats.processIrusStats();
|
|
|
|
logger.info("Irus done");
|
|
|
|
}
|
|
|
|
|
|
|
|
SarcStats sarcStats = new SarcStats();
|
|
|
|
|
|
|
|
if (ExecuteWorkflow.sarcProcessStats) {
|
|
|
|
sarcStats.processSarc();
|
|
|
|
}
|
|
|
|
logger.info("Sarc done");
|
|
|
|
|
|
|
|
// finalize usagestats
|
|
|
|
if (ExecuteWorkflow.finalizeStats) {
|
|
|
|
piwikstatsdb.finalizeStats();
|
|
|
|
logger.info("Finalized stats");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make the tables available to Impala
|
|
|
|
if (ExecuteWorkflow.finalTablesVisibleToImpala) {
|
|
|
|
logger.info("Making tables visible to Impala");
|
|
|
|
invalidateMetadata();
|
|
|
|
}
|
|
|
|
|
|
|
|
logger.info("End");
|
|
|
|
}
|
|
|
|
|
|
|
|
private void invalidateMetadata() throws SQLException {
|
|
|
|
Statement stmt = null;
|
|
|
|
|
|
|
|
stmt = ConnectDB.getImpalaConnection().createStatement();
|
|
|
|
|
|
|
|
String sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".views_stats";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".views_stats";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
|
|
|
|
stmt.close();
|
|
|
|
ConnectDB.getHiveConnection().close();
|
|
|
|
}
|
2020-11-17 07:36:51 +01:00
|
|
|
}
|