Small corrections for problems that Dimitris found and flag for the number of threads

This commit is contained in:
Spyros Zoupanos 2020-10-20 20:30:26 +03:00
parent 1ca74ce830
commit 6b5b6796b7
5 changed files with 15 additions and 15 deletions

View File

@ -64,6 +64,8 @@ public class ExecuteWorkflow {
static boolean finalizeStats;
static boolean finalTablesVisibleToImpala;
static int numberOfDownloadThreads;
public static void main(String args[]) throws Exception {
// Sending the logs to the console
@ -179,6 +181,8 @@ public class ExecuteWorkflow {
else
finalTablesVisibleToImpala = false;
numberOfDownloadThreads = Integer.parseInt(parser.get("numberOfDownloadThreads"));
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
usagestatsExport.export();
}

View File

@ -3,10 +3,8 @@ package eu.dnetlib.oa.graph.usagestats.export;
import java.io.*;
import java.net.Authenticator;
import java.net.PasswordAuthentication;
import java.net.URL;
import java.net.URLConnection;
import java.security.cert.X509Certificate;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
@ -18,13 +16,6 @@ import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@ -131,7 +122,6 @@ public class PiwikDownloadLogs {
String period = "&period=day&date=" + sdf.format(date);
String outFolder = "";
// portal siteId = 109;
if (siteId == Integer.parseInt(portalMatomoID)) {
outFolder = portalLogPath;
} else {
@ -224,8 +214,7 @@ public class PiwikDownloadLogs {
end.add(Calendar.DAY_OF_MONTH, -1);
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
// FileSystem fs = FileSystem.get(new Configuration());
ExecutorService executor = Executors.newFixedThreadPool(10);// creating a pool of 5 threadsσ
ExecutorService executor = Executors.newFixedThreadPool(ExecuteWorkflow.numberOfDownloadThreads);
for (int siteId : piwikIdToVisit) {
logger.info("Now working on piwikId: " + siteId);
@ -254,6 +243,5 @@ public class PiwikDownloadLogs {
while (!executor.isTerminated()) {
}
System.out.println("Finished all threads");
// fs.close();
}
}

View File

@ -172,7 +172,8 @@ public class SarcStats {
" `ItemIdent`.`Value`, `ItemPerformance`.`Period`.`Begin`, " +
"`ItemPerformance`.`Instance`.`MetricType`, `ItemPerformance`.`Instance`.`Count` " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array " +
"LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent ";
"LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " +
"WHERE `ItemIdent`.`Type`='DOI'";
stmt.executeUpdate(insert_sarc_sushilogtmp);
logger.info("Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");

View File

@ -221,5 +221,11 @@
"paramLongName": "finalTablesVisibleToImpala",
"paramDescription": "Make the usage_stats, views_stats and downloads_stats tables visible to Impala",
"paramRequired": true
},
{
"paramName": "nodt",
"paramLongName": "numberOfDownloadThreads",
"paramDescription": "Number of download threads",
"paramRequired": true
}
]

View File

@ -79,6 +79,7 @@
<arg>--sarcNumberOfIssnToDownload</arg><arg>${sarcNumberOfIssnToDownload}</arg>
<arg>--finalizeStats</arg><arg>${finalizeStats}</arg>
<arg>--finalTablesVisibleToImpala</arg><arg>${finalTablesVisibleToImpala}</arg>
<arg>--numberOfDownloadThreads</arg><arg>${numberOfDownloadThreads}</arg>
<capture-output/>
</java>
<ok to="End" />