forked from D-Net/dnet-hadoop
Small corrections for problems that Dimitris found and flag for the number of threads
This commit is contained in:
parent
1ca74ce830
commit
6b5b6796b7
|
@ -64,6 +64,8 @@ public class ExecuteWorkflow {
|
||||||
static boolean finalizeStats;
|
static boolean finalizeStats;
|
||||||
static boolean finalTablesVisibleToImpala;
|
static boolean finalTablesVisibleToImpala;
|
||||||
|
|
||||||
|
static int numberOfDownloadThreads;
|
||||||
|
|
||||||
public static void main(String args[]) throws Exception {
|
public static void main(String args[]) throws Exception {
|
||||||
|
|
||||||
// Sending the logs to the console
|
// Sending the logs to the console
|
||||||
|
@ -179,6 +181,8 @@ public class ExecuteWorkflow {
|
||||||
else
|
else
|
||||||
finalTablesVisibleToImpala = false;
|
finalTablesVisibleToImpala = false;
|
||||||
|
|
||||||
|
numberOfDownloadThreads = Integer.parseInt(parser.get("numberOfDownloadThreads"));
|
||||||
|
|
||||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
||||||
usagestatsExport.export();
|
usagestatsExport.export();
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,10 +3,8 @@ package eu.dnetlib.oa.graph.usagestats.export;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.net.Authenticator;
|
import java.net.Authenticator;
|
||||||
import java.net.PasswordAuthentication;
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLConnection;
|
import java.net.URLConnection;
|
||||||
import java.security.cert.X509Certificate;
|
|
||||||
import java.sql.PreparedStatement;
|
import java.sql.PreparedStatement;
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
|
@ -18,13 +16,6 @@ import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
|
||||||
import javax.net.ssl.HostnameVerifier;
|
|
||||||
import javax.net.ssl.HttpsURLConnection;
|
|
||||||
import javax.net.ssl.SSLContext;
|
|
||||||
import javax.net.ssl.SSLSession;
|
|
||||||
import javax.net.ssl.TrustManager;
|
|
||||||
import javax.net.ssl.X509TrustManager;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
@ -131,7 +122,6 @@ public class PiwikDownloadLogs {
|
||||||
|
|
||||||
String period = "&period=day&date=" + sdf.format(date);
|
String period = "&period=day&date=" + sdf.format(date);
|
||||||
String outFolder = "";
|
String outFolder = "";
|
||||||
// portal siteId = 109;
|
|
||||||
if (siteId == Integer.parseInt(portalMatomoID)) {
|
if (siteId == Integer.parseInt(portalMatomoID)) {
|
||||||
outFolder = portalLogPath;
|
outFolder = portalLogPath;
|
||||||
} else {
|
} else {
|
||||||
|
@ -224,8 +214,7 @@ public class PiwikDownloadLogs {
|
||||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
|
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
|
||||||
|
|
||||||
// FileSystem fs = FileSystem.get(new Configuration());
|
ExecutorService executor = Executors.newFixedThreadPool(ExecuteWorkflow.numberOfDownloadThreads);
|
||||||
ExecutorService executor = Executors.newFixedThreadPool(10);// creating a pool of 5 threadsσ
|
|
||||||
for (int siteId : piwikIdToVisit) {
|
for (int siteId : piwikIdToVisit) {
|
||||||
|
|
||||||
logger.info("Now working on piwikId: " + siteId);
|
logger.info("Now working on piwikId: " + siteId);
|
||||||
|
@ -254,6 +243,5 @@ public class PiwikDownloadLogs {
|
||||||
while (!executor.isTerminated()) {
|
while (!executor.isTerminated()) {
|
||||||
}
|
}
|
||||||
System.out.println("Finished all threads");
|
System.out.println("Finished all threads");
|
||||||
// fs.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -172,7 +172,8 @@ public class SarcStats {
|
||||||
" `ItemIdent`.`Value`, `ItemPerformance`.`Period`.`Begin`, " +
|
" `ItemIdent`.`Value`, `ItemPerformance`.`Period`.`Begin`, " +
|
||||||
"`ItemPerformance`.`Instance`.`MetricType`, `ItemPerformance`.`Instance`.`Count` " +
|
"`ItemPerformance`.`Instance`.`MetricType`, `ItemPerformance`.`Instance`.`Count` " +
|
||||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array " +
|
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array " +
|
||||||
"LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent ";
|
"LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " +
|
||||||
|
"WHERE `ItemIdent`.`Type`='DOI'";
|
||||||
stmt.executeUpdate(insert_sarc_sushilogtmp);
|
stmt.executeUpdate(insert_sarc_sushilogtmp);
|
||||||
logger.info("Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
|
logger.info("Inserted to sarc_sushilogtmp table (sarc_sushilogtmp_json_array)");
|
||||||
|
|
||||||
|
|
|
@ -221,5 +221,11 @@
|
||||||
"paramLongName": "finalTablesVisibleToImpala",
|
"paramLongName": "finalTablesVisibleToImpala",
|
||||||
"paramDescription": "Make the usage_stats, views_stats and downloads_stats tables visible to Impala",
|
"paramDescription": "Make the usage_stats, views_stats and downloads_stats tables visible to Impala",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
"paramName": "nodt",
|
||||||
|
"paramLongName": "numberOfDownloadThreads",
|
||||||
|
"paramDescription": "Number of download threads",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -79,6 +79,7 @@
|
||||||
<arg>--sarcNumberOfIssnToDownload</arg><arg>${sarcNumberOfIssnToDownload}</arg>
|
<arg>--sarcNumberOfIssnToDownload</arg><arg>${sarcNumberOfIssnToDownload}</arg>
|
||||||
<arg>--finalizeStats</arg><arg>${finalizeStats}</arg>
|
<arg>--finalizeStats</arg><arg>${finalizeStats}</arg>
|
||||||
<arg>--finalTablesVisibleToImpala</arg><arg>${finalTablesVisibleToImpala}</arg>
|
<arg>--finalTablesVisibleToImpala</arg><arg>${finalTablesVisibleToImpala}</arg>
|
||||||
|
<arg>--numberOfDownloadThreads</arg><arg>${numberOfDownloadThreads}</arg>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</java>
|
</java>
|
||||||
<ok to="End" />
|
<ok to="End" />
|
||||||
|
|
Loading…
Reference in New Issue