112 lines
5.8 KiB
Java
112 lines
5.8 KiB
Java
|
|
package eu.dnetlib.oa.graph.datasetsusagestats.export;
|
|
|
|
import java.io.IOException;
|
|
import java.sql.SQLException;
|
|
import java.sql.Statement;
|
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
import org.apache.hadoop.fs.Path;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
/**
|
|
* Main class for downloading and processing Usage statistics
|
|
*
|
|
* @author D. Pierrakos, S. Zoupanos
|
|
*/
|
|
public class UsageStatsExporter {
|
|
|
|
private Statement stmt = null;
|
|
|
|
public UsageStatsExporter() {
|
|
|
|
}
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
|
|
|
|
private void reCreateLogDirs() throws IllegalArgumentException, IOException {
|
|
FileSystem dfs = FileSystem.get(new Configuration());
|
|
|
|
logger.info("Deleting Log directory: " + ExecuteWorkflow.dataciteReportPath);
|
|
dfs.delete(new Path(ExecuteWorkflow.dataciteReportPath), true);
|
|
|
|
logger.info("Creating Log directory: " + ExecuteWorkflow.dataciteReportPath);
|
|
dfs.mkdirs(new Path(ExecuteWorkflow.dataciteReportPath));
|
|
|
|
}
|
|
|
|
public void export() throws Exception {
|
|
|
|
logger.info("Initialising DB properties");
|
|
ConnectDB.init();
|
|
ConnectDB.getHiveConnection();
|
|
|
|
if (ExecuteWorkflow.recreateDbAndTables) {
|
|
DatasetsStatsDB datasetsDB = new DatasetsStatsDB("", "");
|
|
datasetsDB.recreateDBAndTables();
|
|
}
|
|
logger.info("Initializing the download logs module");
|
|
DownloadReportsListFromDatacite drfd = new DownloadReportsListFromDatacite(ExecuteWorkflow.dataciteBaseURL,
|
|
ExecuteWorkflow.dataciteReportPath);
|
|
|
|
if (ExecuteWorkflow.datasetsEmptyDirs) {
|
|
logger.info("Downloading Reports List From Datacite");
|
|
drfd.downloadReportsList();
|
|
logger.info("Reports List has been downloaded");
|
|
}
|
|
|
|
ReadReportsListFromDatacite readReportsListFromDatacite = new ReadReportsListFromDatacite(
|
|
ExecuteWorkflow.dataciteReportPath);
|
|
logger.info("Store Reports To DB");
|
|
readReportsListFromDatacite.readReports();
|
|
logger.info("Reports Stored To DB");
|
|
}
|
|
|
|
// runImpalaQuery();
|
|
/*
|
|
* PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
|
* logger.info("Re-creating database and tables"); logger.info("Initializing the download logs module");
|
|
* PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken);
|
|
* if (ExecuteWorkflow.piwikEmptyDirs) { logger.info("Recreating Piwik log directories");
|
|
* piwikstatsdb.reCreateLogDirs(); } // Downloading piwik logs (also managing directory creation) if
|
|
* (ExecuteWorkflow.downloadPiwikLogs) { logger.info("Downloading piwik logs"); piwd .GetOpenAIRELogs(
|
|
* ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID); }
|
|
* logger.info("Downloaded piwik logs"); // Create DB tables, insert/update statistics String cRobotsUrl =
|
|
* "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
|
* piwikstatsdb.setCounterRobotsURL(cRobotsUrl); if (ExecuteWorkflow.processPiwikLogs) {
|
|
* logger.info("Processing logs"); piwikstatsdb.processLogs(); } logger.info("Creating LaReferencia tables");
|
|
* LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(ExecuteWorkflow.lareferenciaBaseURL,
|
|
* ExecuteWorkflow.lareferenciaAuthToken); if (ExecuteWorkflow.laReferenciaEmptyDirs) {
|
|
* logger.info("Recreating LaReferencia log directories"); lrf.reCreateLogDirs(); } if
|
|
* (ExecuteWorkflow.downloadLaReferenciaLogs) { logger.info("Downloading LaReferencia logs");
|
|
* lrf.GetLaReferenciaRepos(ExecuteWorkflow.lareferenciaLogPath); logger.info("Downloaded LaReferencia logs"); }
|
|
* LaReferenciaStats lastats = new LaReferenciaStats(ExecuteWorkflow.lareferenciaLogPath); if
|
|
* (ExecuteWorkflow.processLaReferenciaLogs) { logger.info("Processing LaReferencia logs"); lastats.processLogs();
|
|
* logger.info("LaReferencia logs done"); } IrusStats irusstats = new IrusStats(ExecuteWorkflow.irusUKBaseURL); if
|
|
* (ExecuteWorkflow.irusCreateTablesEmptyDirs) { logger.info("Creating Irus Stats tables");
|
|
* irusstats.createTables(); logger.info("Created Irus Stats tables"); logger.info("Re-create log dirs");
|
|
* irusstats.reCreateLogDirs(); logger.info("Re-created log dirs"); } if (ExecuteWorkflow.irusDownloadReports) {
|
|
* irusstats.getIrusRRReport(ExecuteWorkflow.irusUKReportPath); } if (ExecuteWorkflow.irusProcessStats) {
|
|
* irusstats.processIrusStats(); logger.info("Irus done"); } SarcStats sarcStats = new SarcStats(); if
|
|
* (ExecuteWorkflow.sarcCreateTablesEmptyDirs) { sarcStats.reCreateLogDirs(); } if
|
|
* (ExecuteWorkflow.sarcDownloadReports) { sarcStats.getAndProcessSarc(ExecuteWorkflow.sarcsReportPathArray,
|
|
* ExecuteWorkflow.sarcsReportPathNonArray); } if (ExecuteWorkflow.sarcProcessStats) {
|
|
* sarcStats.processSarc(ExecuteWorkflow.sarcsReportPathArray, ExecuteWorkflow.sarcsReportPathNonArray);
|
|
* sarcStats.finalizeSarcStats(); } logger.info("Sarc done"); // finalize usagestats if
|
|
* (ExecuteWorkflow.finalizeStats) { piwikstatsdb.finalizeStats(); logger.info("Finalized stats"); } // Make the
|
|
* tables available to Impala if (ExecuteWorkflow.finalTablesVisibleToImpala) {
|
|
* logger.info("Making tables visible to Impala"); invalidateMetadata(); } logger.info("End");
|
|
*/
|
|
}
|
|
/*
|
|
* private void invalidateMetadata() throws SQLException { Statement stmt = null; stmt =
|
|
* ConnectDB.getImpalaConnection().createStatement(); String sql = "INVALIDATE METADATA " +
|
|
* ConnectDB.getDataSetUsageStatsDBSchema() + ".downloads_stats"; stmt.executeUpdate(sql); sql = "INVALIDATE METADATA "
|
|
* + ConnectDB.getDataSetUsageStatsDBSchema() + ".views_stats"; stmt.executeUpdate(sql); sql = "INVALIDATE METADATA " +
|
|
* ConnectDB.getDataSetUsageStatsDBSchema() + ".usage_stats"; stmt.executeUpdate(sql); sql = "INVALIDATE METADATA " +
|
|
* ConnectDB.getDataSetUsageStatsDBSchema() + ".pageviews_stats"; stmt.executeUpdate(sql); stmt.close();
|
|
* ConnectDB.getHiveConnection().close(); }
|
|
*/
|