From e2748fea9545640df88c910cd22c8e93a8578da0 Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Sat, 3 Oct 2020 19:47:57 +0300 Subject: [PATCH] Download directory automatic deletion & creation --- .../usagestats/export/ExecuteWorkflow.java | 23 ++----- .../graph/usagestats/export/PiwikStatsDB.java | 15 +++-- .../usagestats/export/UsageStatsExporter.java | 61 ++++++++++++++++++- .../export/usagestats_parameters.json | 6 ++ .../graph/usagestats/oozie_app/workflow.xml | 1 + 5 files changed, 82 insertions(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java index e0dc4e00e..7bddd4d95 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java @@ -31,6 +31,7 @@ public class ExecuteWorkflow { static String dbImpalaUrl; static String usageStatsDBSchema; static String statsDBSchema; + static boolean downloadLogs; public static void main(String args[]) throws Exception { @@ -56,28 +57,16 @@ public class ExecuteWorkflow { lareferenciaBaseURL = parser.get("lareferenciaBaseURL"); lareferenciaAuthToken = parser.get("lareferenciaAuthToken"); + if (parser.get("downloadLogs").toLowerCase().equals("true")) + downloadLogs = true; + else + downloadLogs = false; + dbHiveUrl = parser.get("dbHiveUrl"); dbImpalaUrl = parser.get("dbImpalaUrl"); usageStatsDBSchema = parser.get("usageStatsDBSchema"); statsDBSchema = parser.get("statsDBSchema"); - System.out.println("====> Printing parsed variables"); - System.out.println(ExecuteWorkflow.matomoAuthToken); - System.out.println(ExecuteWorkflow.matomoBaseURL); - System.out.println(ExecuteWorkflow.repoLogPath); - System.out.println(ExecuteWorkflow.portalLogPath); - System.out.println(ExecuteWorkflow.irusUKBaseURL); - System.out.println(ExecuteWorkflow.irusUKReportPath); - System.out.println(ExecuteWorkflow.sarcsReportPathArray); - System.out.println(ExecuteWorkflow.sarcsReportPathNonArray); - System.out.println(ExecuteWorkflow.lareferenciaLogPath); - System.out.println(ExecuteWorkflow.lareferenciaBaseURL); - System.out.println(ExecuteWorkflow.lareferenciaAuthToken); - System.out.println(ExecuteWorkflow.dbHiveUrl); - System.out.println(ExecuteWorkflow.dbImpalaUrl); - System.out.println(ExecuteWorkflow.usageStatsDBSchema); - System.out.println(ExecuteWorkflow.statsDBSchema); - UsageStatsExporter usagestatsExport = new UsageStatsExporter(); usagestatsExport.export(); } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index 4cdf11310..4abb1d1d9 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -79,11 +79,18 @@ public class PiwikStatsDB { try { stmt = ConnectDB.getHiveConnection().createStatement(); - logger.info("Dropping usagestats DB"); - String dropDatabase = "DROP DATABASE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + "CASCADE;"; + logger.info("Dropping usagestats DB: " + ConnectDB.getUsageStatsDBSchema()); + String dropDatabase = "DROP DATABASE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + " CASCADE"; stmt.executeUpdate(dropDatabase); - - logger.info("Creating usagestats DB"); + } catch (Exception e) { + logger.error("Failed to drop database: " + e); + throw new Exception("Failed to drop database: " + e.toString(), e); + } + + try { + stmt = ConnectDB.getHiveConnection().createStatement(); + + logger.info("Creating usagestats DB: " + ConnectDB.getUsageStatsDBSchema()); String createDatabase = "CREATE DATABASE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema(); stmt.executeUpdate(createDatabase); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 50529a06f..d005a31ab 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -1,9 +1,15 @@ package eu.dnetlib.oa.graph.usagestats.export; +import java.io.IOException; import java.sql.ResultSet; import java.sql.Statement; +import javax.sound.midi.SysexMessage; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,26 +47,75 @@ public class UsageStatsExporter { stmt.close(); } + private void reCreateLogDirs() throws IllegalArgumentException, IOException { + FileSystem dfs = FileSystem.get(new Configuration()); + + logger.info("Deleting log directories"); + + logger.info("Deleting repoLog directory: " + ExecuteWorkflow.repoLogPath); + dfs.delete(new Path(ExecuteWorkflow.repoLogPath), true); + + logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath); + dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true); + + logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath); + dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true); + + logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray); + dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true); + + logger.info("Deleting sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray); + dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathNonArray), true); + + logger.info("Deleting lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath); + dfs.delete(new Path(ExecuteWorkflow.lareferenciaLogPath), true); + + logger.info("Creating log directories"); + + logger.info("Creating repoLog directory: " + ExecuteWorkflow.repoLogPath); + dfs.mkdirs(new Path(ExecuteWorkflow.repoLogPath)); + + logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath); + dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath)); + + logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath); + dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath)); + + logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray); + dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray)); + + logger.info("Creating sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray); + dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathNonArray)); + + logger.info("Creating lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath); + dfs.mkdirs(new Path(ExecuteWorkflow.lareferenciaLogPath)); + } + public void export() throws Exception { logger.info("Initialising DB properties"); ConnectDB.init(); -// System.exit(0); - // runImpalaQuery(); // Create DB tables - they are also needed to download the statistics too logger.info("Creating database and tables"); PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath); // + reCreateLogDirs(); + + System.exit(0); // // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for // // the moment logger.info("Initializing the download logs module"); PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken); logger.info("Downloading piwik logs"); -// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID); + if (ExecuteWorkflow.downloadLogs) + piwd + .GetOpenAIRELogs( + ExecuteWorkflow.repoLogPath, + ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID); logger.info("Downloaded piwik logs"); // Create DB tables, insert/update statistics diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json index a8737750e..7f2592ea5 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json +++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json @@ -94,5 +94,11 @@ "paramLongName": "statsDBSchema", "paramDescription": "activate tranform-only mode. Only apply transformation step", "paramRequired": true + }, + { + "paramName": "dl", + "paramLongName": "downloadLogs", + "paramDescription": "download logs?", + "paramRequired": true } ] diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml index 8cd425850..27fcabaec 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml @@ -58,6 +58,7 @@ --dbImpalaUrl${impalaJdbcUrl} --usageStatsDBSchema${usageStatsDBSchema} --statsDBSchema${statsDBSchema} + --downloadLogs${downloadLogs}