forked from D-Net/dnet-hadoop
Changes for proper log downloading (limits on starting and ending period) + loggers to STDOUT
This commit is contained in:
parent
e2748fea95
commit
7b7075cfdd
|
@ -6,7 +6,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.oa.graph.usagestats.export;
|
package eu.dnetlib.oa.graph.usagestats.export;
|
||||||
|
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.log4j.BasicConfigurator;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
@ -32,9 +37,14 @@ public class ExecuteWorkflow {
|
||||||
static String usageStatsDBSchema;
|
static String usageStatsDBSchema;
|
||||||
static String statsDBSchema;
|
static String statsDBSchema;
|
||||||
static boolean downloadLogs;
|
static boolean downloadLogs;
|
||||||
|
static Calendar startingLogPeriod;
|
||||||
|
static Calendar endingLogPeriod;
|
||||||
|
|
||||||
public static void main(String args[]) throws Exception {
|
public static void main(String args[]) throws Exception {
|
||||||
|
|
||||||
|
// Sending the logs to the console
|
||||||
|
BasicConfigurator.configure();
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
|
@ -57,17 +67,33 @@ public class ExecuteWorkflow {
|
||||||
lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
|
lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
|
||||||
lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
|
lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
|
||||||
|
|
||||||
if (parser.get("downloadLogs").toLowerCase().equals("true"))
|
|
||||||
downloadLogs = true;
|
|
||||||
else
|
|
||||||
downloadLogs = false;
|
|
||||||
|
|
||||||
dbHiveUrl = parser.get("dbHiveUrl");
|
dbHiveUrl = parser.get("dbHiveUrl");
|
||||||
dbImpalaUrl = parser.get("dbImpalaUrl");
|
dbImpalaUrl = parser.get("dbImpalaUrl");
|
||||||
usageStatsDBSchema = parser.get("usageStatsDBSchema");
|
usageStatsDBSchema = parser.get("usageStatsDBSchema");
|
||||||
statsDBSchema = parser.get("statsDBSchema");
|
statsDBSchema = parser.get("statsDBSchema");
|
||||||
|
|
||||||
|
if (parser.get("downloadLogs").toLowerCase().equals("true"))
|
||||||
|
downloadLogs = true;
|
||||||
|
else
|
||||||
|
downloadLogs = false;
|
||||||
|
|
||||||
|
String startingLogPeriodStr = parser.get("startingLogPeriod");
|
||||||
|
Date startingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(startingLogPeriodStr);
|
||||||
|
startingLogPeriod = startingLogPeriodStr(startingLogPeriodDate);
|
||||||
|
|
||||||
|
String endingLogPeriodStr = parser.get("endingLogPeriod");
|
||||||
|
Date endingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(endingLogPeriodStr);
|
||||||
|
endingLogPeriod = startingLogPeriodStr(endingLogPeriodDate);
|
||||||
|
|
||||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
||||||
usagestatsExport.export();
|
usagestatsExport.export();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Calendar startingLogPeriodStr(Date date) {
|
||||||
|
|
||||||
|
Calendar calendar = Calendar.getInstance();
|
||||||
|
calendar.setTime(date);
|
||||||
|
return calendar;
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,6 +87,8 @@ public class PiwikDownloadLogs {
|
||||||
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||||
|
|
||||||
Statement statement = ConnectDB.getHiveConnection().createStatement();
|
Statement statement = ConnectDB.getHiveConnection().createStatement();
|
||||||
|
// SimpleDateFormat sdf = new SimpleDateFormat("MM/dd/yyyy");
|
||||||
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
|
|
||||||
ResultSet rs = statement
|
ResultSet rs = statement
|
||||||
.executeQuery(
|
.executeQuery(
|
||||||
|
@ -94,17 +96,21 @@ public class PiwikDownloadLogs {
|
||||||
+ ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id");
|
+ ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id");
|
||||||
while (rs.next()) {
|
while (rs.next()) {
|
||||||
int siteId = rs.getInt(1);
|
int siteId = rs.getInt(1);
|
||||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
// SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||||
|
|
||||||
Calendar start = Calendar.getInstance();
|
// Calendar start = Calendar.getInstance();
|
||||||
start.set(Calendar.YEAR, 2016);
|
// start.set(Calendar.YEAR, 2016);
|
||||||
start.set(Calendar.MONTH, Calendar.MARCH);
|
// start.set(Calendar.MONTH, Calendar.MARCH);
|
||||||
// start.setTime(simpleDateFormat.parse("2016-01"));
|
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||||
|
Calendar start = ExecuteWorkflow.startingLogPeriod;
|
||||||
|
logger.info("GetOpenAIRELogs starting period: " + sdf.format(start.getTime()));
|
||||||
|
|
||||||
Calendar end = Calendar.getInstance();
|
// Calendar end = Calendar.getInstance();
|
||||||
|
// end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
Calendar end = ExecuteWorkflow.endingLogPeriod;
|
||||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
logger.info("GetOpenAIRELogs ending period: " + sdf.format(end.getTime()));
|
||||||
|
|
||||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
|
||||||
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
||||||
.prepareStatement(
|
.prepareStatement(
|
||||||
"SELECT max(timestamp) FROM " + ConnectDB.getUsageStatsDBSchema()
|
"SELECT max(timestamp) FROM " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
|
|
@ -20,6 +20,10 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class UsageStatsExporter {
|
public class UsageStatsExporter {
|
||||||
|
|
||||||
|
public UsageStatsExporter() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
|
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
|
||||||
|
|
||||||
public void runImpalaQuery() throws Exception {
|
public void runImpalaQuery() throws Exception {
|
||||||
|
@ -50,8 +54,6 @@ public class UsageStatsExporter {
|
||||||
private void reCreateLogDirs() throws IllegalArgumentException, IOException {
|
private void reCreateLogDirs() throws IllegalArgumentException, IOException {
|
||||||
FileSystem dfs = FileSystem.get(new Configuration());
|
FileSystem dfs = FileSystem.get(new Configuration());
|
||||||
|
|
||||||
logger.info("Deleting log directories");
|
|
||||||
|
|
||||||
logger.info("Deleting repoLog directory: " + ExecuteWorkflow.repoLogPath);
|
logger.info("Deleting repoLog directory: " + ExecuteWorkflow.repoLogPath);
|
||||||
dfs.delete(new Path(ExecuteWorkflow.repoLogPath), true);
|
dfs.delete(new Path(ExecuteWorkflow.repoLogPath), true);
|
||||||
|
|
||||||
|
@ -70,8 +72,6 @@ public class UsageStatsExporter {
|
||||||
logger.info("Deleting lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath);
|
logger.info("Deleting lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath);
|
||||||
dfs.delete(new Path(ExecuteWorkflow.lareferenciaLogPath), true);
|
dfs.delete(new Path(ExecuteWorkflow.lareferenciaLogPath), true);
|
||||||
|
|
||||||
logger.info("Creating log directories");
|
|
||||||
|
|
||||||
logger.info("Creating repoLog directory: " + ExecuteWorkflow.repoLogPath);
|
logger.info("Creating repoLog directory: " + ExecuteWorkflow.repoLogPath);
|
||||||
dfs.mkdirs(new Path(ExecuteWorkflow.repoLogPath));
|
dfs.mkdirs(new Path(ExecuteWorkflow.repoLogPath));
|
||||||
|
|
||||||
|
@ -101,10 +101,9 @@ public class UsageStatsExporter {
|
||||||
// Create DB tables - they are also needed to download the statistics too
|
// Create DB tables - they are also needed to download the statistics too
|
||||||
logger.info("Creating database and tables");
|
logger.info("Creating database and tables");
|
||||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
||||||
//
|
|
||||||
reCreateLogDirs();
|
|
||||||
|
|
||||||
System.exit(0);
|
logger.info("Recreating log directories");
|
||||||
|
reCreateLogDirs();
|
||||||
|
|
||||||
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||||
// // the moment
|
// // the moment
|
||||||
|
@ -118,6 +117,8 @@ public class UsageStatsExporter {
|
||||||
ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID);
|
ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID);
|
||||||
logger.info("Downloaded piwik logs");
|
logger.info("Downloaded piwik logs");
|
||||||
|
|
||||||
|
System.exit(0);
|
||||||
|
|
||||||
// Create DB tables, insert/update statistics
|
// Create DB tables, insert/update statistics
|
||||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||||
|
|
|
@ -100,5 +100,17 @@
|
||||||
"paramLongName": "downloadLogs",
|
"paramLongName": "downloadLogs",
|
||||||
"paramDescription": "download logs?",
|
"paramDescription": "download logs?",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "slp",
|
||||||
|
"paramLongName": "startingLogPeriod",
|
||||||
|
"paramDescription": "Starting log period",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "elp",
|
||||||
|
"paramLongName": "endingLogPeriod",
|
||||||
|
"paramDescription": "Ending log period",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -59,6 +59,8 @@
|
||||||
<arg>--usageStatsDBSchema</arg><arg>${usageStatsDBSchema}</arg>
|
<arg>--usageStatsDBSchema</arg><arg>${usageStatsDBSchema}</arg>
|
||||||
<arg>--statsDBSchema</arg><arg>${statsDBSchema}</arg>
|
<arg>--statsDBSchema</arg><arg>${statsDBSchema}</arg>
|
||||||
<arg>--downloadLogs</arg><arg>${downloadLogs}</arg>
|
<arg>--downloadLogs</arg><arg>${downloadLogs}</arg>
|
||||||
|
<arg>--startingLogPeriod</arg><arg>${startingLogPeriod}</arg>
|
||||||
|
<arg>--endingLogPeriod</arg><arg>${endingLogPeriod}</arg>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</java>
|
</java>
|
||||||
<ok to="End" />
|
<ok to="End" />
|
||||||
|
|
Loading…
Reference in New Issue