diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java index 7bddd4d95..834539d2d 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java @@ -6,7 +6,12 @@ package eu.dnetlib.oa.graph.usagestats.export; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + import org.apache.commons.io.IOUtils; +import org.apache.log4j.BasicConfigurator; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -32,9 +37,14 @@ public class ExecuteWorkflow { static String usageStatsDBSchema; static String statsDBSchema; static boolean downloadLogs; + static Calendar startingLogPeriod; + static Calendar endingLogPeriod; public static void main(String args[]) throws Exception { + // Sending the logs to the console + BasicConfigurator.configure(); + final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( @@ -57,17 +67,33 @@ public class ExecuteWorkflow { lareferenciaBaseURL = parser.get("lareferenciaBaseURL"); lareferenciaAuthToken = parser.get("lareferenciaAuthToken"); - if (parser.get("downloadLogs").toLowerCase().equals("true")) - downloadLogs = true; - else - downloadLogs = false; - dbHiveUrl = parser.get("dbHiveUrl"); dbImpalaUrl = parser.get("dbImpalaUrl"); usageStatsDBSchema = parser.get("usageStatsDBSchema"); statsDBSchema = parser.get("statsDBSchema"); + if (parser.get("downloadLogs").toLowerCase().equals("true")) + downloadLogs = true; + else + downloadLogs = false; + + String startingLogPeriodStr = parser.get("startingLogPeriod"); + Date startingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(startingLogPeriodStr); + startingLogPeriod = startingLogPeriodStr(startingLogPeriodDate); + + String endingLogPeriodStr = parser.get("endingLogPeriod"); + Date endingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(endingLogPeriodStr); + endingLogPeriod = startingLogPeriodStr(endingLogPeriodDate); + UsageStatsExporter usagestatsExport = new UsageStatsExporter(); usagestatsExport.export(); } + + private static Calendar startingLogPeriodStr(Date date) { + + Calendar calendar = Calendar.getInstance(); + calendar.setTime(date); + return calendar; + + } } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java index a1e18e8fc..7e94ea3e3 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java @@ -87,6 +87,8 @@ public class PiwikDownloadLogs { public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception { Statement statement = ConnectDB.getHiveConnection().createStatement(); +// SimpleDateFormat sdf = new SimpleDateFormat("MM/dd/yyyy"); + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); ResultSet rs = statement .executeQuery( @@ -94,17 +96,21 @@ public class PiwikDownloadLogs { + ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id"); while (rs.next()) { int siteId = rs.getInt(1); - SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); +// SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); - Calendar start = Calendar.getInstance(); - start.set(Calendar.YEAR, 2016); - start.set(Calendar.MONTH, Calendar.MARCH); +// Calendar start = Calendar.getInstance(); +// start.set(Calendar.YEAR, 2016); +// start.set(Calendar.MONTH, Calendar.MARCH); // start.setTime(simpleDateFormat.parse("2016-01")); + Calendar start = ExecuteWorkflow.startingLogPeriod; + logger.info("GetOpenAIRELogs starting period: " + sdf.format(start.getTime())); - Calendar end = Calendar.getInstance(); +// Calendar end = Calendar.getInstance(); +// end.add(Calendar.DAY_OF_MONTH, -1); + Calendar end = ExecuteWorkflow.endingLogPeriod; end.add(Calendar.DAY_OF_MONTH, -1); + logger.info("GetOpenAIRELogs ending period: " + sdf.format(end.getTime())); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION .prepareStatement( "SELECT max(timestamp) FROM " + ConnectDB.getUsageStatsDBSchema() diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index d005a31ab..473bcc3fd 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -20,6 +20,10 @@ import org.slf4j.LoggerFactory; */ public class UsageStatsExporter { + public UsageStatsExporter() { + + } + private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class); public void runImpalaQuery() throws Exception { @@ -50,8 +54,6 @@ public class UsageStatsExporter { private void reCreateLogDirs() throws IllegalArgumentException, IOException { FileSystem dfs = FileSystem.get(new Configuration()); - logger.info("Deleting log directories"); - logger.info("Deleting repoLog directory: " + ExecuteWorkflow.repoLogPath); dfs.delete(new Path(ExecuteWorkflow.repoLogPath), true); @@ -70,8 +72,6 @@ public class UsageStatsExporter { logger.info("Deleting lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath); dfs.delete(new Path(ExecuteWorkflow.lareferenciaLogPath), true); - logger.info("Creating log directories"); - logger.info("Creating repoLog directory: " + ExecuteWorkflow.repoLogPath); dfs.mkdirs(new Path(ExecuteWorkflow.repoLogPath)); @@ -101,10 +101,9 @@ public class UsageStatsExporter { // Create DB tables - they are also needed to download the statistics too logger.info("Creating database and tables"); PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath); -// - reCreateLogDirs(); - System.exit(0); + logger.info("Recreating log directories"); + reCreateLogDirs(); // // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for // // the moment @@ -118,6 +117,8 @@ public class UsageStatsExporter { ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID); logger.info("Downloaded piwik logs"); + System.exit(0); + // Create DB tables, insert/update statistics // String cRobotsUrl = properties.getProperty("COUNTER_robots_Url"); String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json"; diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json index 7f2592ea5..b5c28ca1f 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json +++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json @@ -100,5 +100,17 @@ "paramLongName": "downloadLogs", "paramDescription": "download logs?", "paramRequired": true + }, + { + "paramName": "slp", + "paramLongName": "startingLogPeriod", + "paramDescription": "Starting log period", + "paramRequired": true + }, + { + "paramName": "elp", + "paramLongName": "endingLogPeriod", + "paramDescription": "Ending log period", + "paramRequired": true } ] diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml index 27fcabaec..8d281fd62 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml @@ -59,6 +59,8 @@ --usageStatsDBSchema${usageStatsDBSchema} --statsDBSchema${statsDBSchema} --downloadLogs${downloadLogs} + --startingLogPeriod${startingLogPeriod} + --endingLogPeriod${endingLogPeriod}