package eu.dnetlib.oa.graph.usagestats.export; import java.io.IOException; import java.sql.ResultSet; import java.sql.Statement; import javax.sound.midi.SysexMessage; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Main class for downloading and processing Usage statistics * * @author D. Pierrakos, S. Zoupanos */ public class UsageStatsExporter { private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class); public void runImpalaQuery() throws Exception { Statement stmt = ConnectDB.getImpalaConnection().createStatement(); ConnectDB.getImpalaConnection().setAutoCommit(false); logger.info("Executing Impala query"); Statement statement = ConnectDB.getImpalaConnection().createStatement(); ResultSet rs = statement .executeQuery( // "CREATE TABLE usagestats_20200913.spyros_tmp5 AS\n" + // "SELECT s.source, d.id AS repository_id, ro.id as result_id, s.count, '0' \n" + // "FROM usagestats_20200913.sarc_sushilogtmp2 s, \n" + // "openaire_prod_stats_shadow_20200821.datasource_oids d, \n" + // "openaire_prod_stats_shadow_20200821.datasource_results dr, \n" + // "openaire_prod_stats_shadow_20200821.result_pids ro \n" + // "WHERE d.oid LIKE CONCAT('%', s.repository, '%') AND dr.id=d.id AND dr.result=ro.id \n" + // "AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS' "); "CREATE TABLE usagestats_20200913.spyros_tmp6 AS\n" + "SELECT * \n" + "FROM usagestats_20200913.sarc_sushilogtmp2"); stmt.close(); } private void reCreateLogDirs() throws IllegalArgumentException, IOException { FileSystem dfs = FileSystem.get(new Configuration()); logger.info("Deleting log directories"); logger.info("Deleting repoLog directory: " + ExecuteWorkflow.repoLogPath); dfs.delete(new Path(ExecuteWorkflow.repoLogPath), true); logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath); dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true); logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath); dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true); logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray); dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true); logger.info("Deleting sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray); dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathNonArray), true); logger.info("Deleting lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath); dfs.delete(new Path(ExecuteWorkflow.lareferenciaLogPath), true); logger.info("Creating log directories"); logger.info("Creating repoLog directory: " + ExecuteWorkflow.repoLogPath); dfs.mkdirs(new Path(ExecuteWorkflow.repoLogPath)); logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath); dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath)); logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath); dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath)); logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray); dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray)); logger.info("Creating sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray); dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathNonArray)); logger.info("Creating lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath); dfs.mkdirs(new Path(ExecuteWorkflow.lareferenciaLogPath)); } public void export() throws Exception { logger.info("Initialising DB properties"); ConnectDB.init(); // runImpalaQuery(); // Create DB tables - they are also needed to download the statistics too logger.info("Creating database and tables"); PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath); // reCreateLogDirs(); System.exit(0); // // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for // // the moment logger.info("Initializing the download logs module"); PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken); logger.info("Downloading piwik logs"); if (ExecuteWorkflow.downloadLogs) piwd .GetOpenAIRELogs( ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID); logger.info("Downloaded piwik logs"); // Create DB tables, insert/update statistics // String cRobotsUrl = properties.getProperty("COUNTER_robots_Url"); String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json"; piwikstatsdb.setCounterRobotsURL(cRobotsUrl); logger.info("Processing logs"); piwikstatsdb.processLogs(); // log.info("process logs done"); logger.info("Creating LaReferencia tables"); LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(ExecuteWorkflow.lareferenciaBaseURL, ExecuteWorkflow.lareferenciaAuthToken); logger.info("Downloading LaReferencia logs"); // lrf.GetLaReferenciaRepos(lareferenciaLogPath); logger.info("Downloaded LaReferencia logs"); LaReferenciaStats lastats = new LaReferenciaStats(ExecuteWorkflow.lareferenciaLogPath); logger.info("Processing LaReferencia logs"); // lastats.processLogs(); // log.info("LaReferencia logs done"); // IrusStats irusstats = new IrusStats(irusUKBaseURL); // irusstats.getIrusRRReport(irusUKReportPath); // irusstats.processIrusStats(); // log.info("irus done"); // SarcStats sarcStats = new SarcStats(); // sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray); // sarcStats.finalizeSarcStats(); // log.info("sarc done"); // // finalize usagestats piwikstatsdb.finalizeStats(); // log.info("finalized stats"); } }