From 6b5b6796b73e6973bf4f5a42a8822946dae2264e Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Tue, 20 Oct 2020 20:30:26 +0300 Subject: [PATCH] Small corrections for problems that Dimitris found and flag for the number of threads --- .../graph/usagestats/export/ExecuteWorkflow.java | 4 ++++ .../graph/usagestats/export/PiwikDownloadLogs.java | 14 +------------- .../oa/graph/usagestats/export/SarcStats.java | 3 ++- .../usagestats/export/usagestats_parameters.json | 8 +++++++- .../dhp/oa/graph/usagestats/oozie_app/workflow.xml | 1 + 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java index 981de5de1..50b951cbc 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java @@ -64,6 +64,8 @@ public class ExecuteWorkflow { static boolean finalizeStats; static boolean finalTablesVisibleToImpala; + static int numberOfDownloadThreads; + public static void main(String args[]) throws Exception { // Sending the logs to the console @@ -179,6 +181,8 @@ public class ExecuteWorkflow { else finalTablesVisibleToImpala = false; + numberOfDownloadThreads = Integer.parseInt(parser.get("numberOfDownloadThreads")); + UsageStatsExporter usagestatsExport = new UsageStatsExporter(); usagestatsExport.export(); } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java index 9e423abd3..6009c16e7 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikDownloadLogs.java @@ -3,10 +3,8 @@ package eu.dnetlib.oa.graph.usagestats.export; import java.io.*; import java.net.Authenticator; -import java.net.PasswordAuthentication; import java.net.URL; import java.net.URLConnection; -import java.security.cert.X509Certificate; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.Statement; @@ -18,13 +16,6 @@ import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import javax.net.ssl.HostnameVerifier; -import javax.net.ssl.HttpsURLConnection; -import javax.net.ssl.SSLContext; -import javax.net.ssl.SSLSession; -import javax.net.ssl.TrustManager; -import javax.net.ssl.X509TrustManager; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -131,7 +122,6 @@ public class PiwikDownloadLogs { String period = "&period=day&date=" + sdf.format(date); String outFolder = ""; - // portal siteId = 109; if (siteId == Integer.parseInt(portalMatomoID)) { outFolder = portalLogPath; } else { @@ -224,8 +214,7 @@ public class PiwikDownloadLogs { end.add(Calendar.DAY_OF_MONTH, -1); logger.info("Ending period for log download: " + sdf.format(end.getTime())); -// FileSystem fs = FileSystem.get(new Configuration()); - ExecutorService executor = Executors.newFixedThreadPool(10);// creating a pool of 5 threadsσ + ExecutorService executor = Executors.newFixedThreadPool(ExecuteWorkflow.numberOfDownloadThreads); for (int siteId : piwikIdToVisit) { logger.info("Now working on piwikId: " + siteId); @@ -254,6 +243,5 @@ public class PiwikDownloadLogs { while (!executor.isTerminated()) { } System.out.println("Finished all threads"); -// fs.close(); } } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java index 8c203b237..295e98280 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java @@ -172,7 +172,8 @@ public class SarcStats { " `ItemIdent`.`Value`, `ItemPerformance`.`Period`.`Begin`, " + "`ItemPerformance`.`Instance`.`MetricType`, `ItemPerformance`.`Instance`.`Count` " + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array " + - "LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent "; + "LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " + + "WHERE `ItemIdent`.`Type`='DOI'"; stmt.executeUpdate(insert_sarc_sushilogtmp); logger.info("Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)"); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json index 3d282a7a7..988c23b48 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json +++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json @@ -221,5 +221,11 @@ "paramLongName": "finalTablesVisibleToImpala", "paramDescription": "Make the usage_stats, views_stats and downloads_stats tables visible to Impala", "paramRequired": true - } + }, + { + "paramName": "nodt", + "paramLongName": "numberOfDownloadThreads", + "paramDescription": "Number of download threads", + "paramRequired": true + } ] diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml index 2ad3e6669..8d62a85a9 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml @@ -79,6 +79,7 @@ --sarcNumberOfIssnToDownload${sarcNumberOfIssnToDownload} --finalizeStats${finalizeStats} --finalTablesVisibleToImpala${finalTablesVisibleToImpala} + --numberOfDownloadThreads${numberOfDownloadThreads}