Changes for proper log downloading (limits on starting and ending period) + loggers to STDOUT

This commit is contained in:
Spyros Zoupanos 2020-10-04 00:24:55 +03:00
parent e2748fea95
commit 7b7075cfdd
5 changed files with 65 additions and 18 deletions

View File

@ -6,7 +6,12 @@
package eu.dnetlib.oa.graph.usagestats.export;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.BasicConfigurator;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
@ -32,9 +37,14 @@ public class ExecuteWorkflow {
static String usageStatsDBSchema;
static String statsDBSchema;
static boolean downloadLogs;
static Calendar startingLogPeriod;
static Calendar endingLogPeriod;
public static void main(String args[]) throws Exception {
// Sending the logs to the console
BasicConfigurator.configure();
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
@ -57,17 +67,33 @@ public class ExecuteWorkflow {
lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
if (parser.get("downloadLogs").toLowerCase().equals("true"))
downloadLogs = true;
else
downloadLogs = false;
dbHiveUrl = parser.get("dbHiveUrl");
dbImpalaUrl = parser.get("dbImpalaUrl");
usageStatsDBSchema = parser.get("usageStatsDBSchema");
statsDBSchema = parser.get("statsDBSchema");
if (parser.get("downloadLogs").toLowerCase().equals("true"))
downloadLogs = true;
else
downloadLogs = false;
String startingLogPeriodStr = parser.get("startingLogPeriod");
Date startingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(startingLogPeriodStr);
startingLogPeriod = startingLogPeriodStr(startingLogPeriodDate);
String endingLogPeriodStr = parser.get("endingLogPeriod");
Date endingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(endingLogPeriodStr);
endingLogPeriod = startingLogPeriodStr(endingLogPeriodDate);
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
usagestatsExport.export();
}
private static Calendar startingLogPeriodStr(Date date) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
return calendar;
}
}

View File

@ -87,6 +87,8 @@ public class PiwikDownloadLogs {
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
Statement statement = ConnectDB.getHiveConnection().createStatement();
// SimpleDateFormat sdf = new SimpleDateFormat("MM/dd/yyyy");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
ResultSet rs = statement
.executeQuery(
@ -94,17 +96,21 @@ public class PiwikDownloadLogs {
+ ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id");
while (rs.next()) {
int siteId = rs.getInt(1);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
// SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
Calendar start = Calendar.getInstance();
start.set(Calendar.YEAR, 2016);
start.set(Calendar.MONTH, Calendar.MARCH);
// Calendar start = Calendar.getInstance();
// start.set(Calendar.YEAR, 2016);
// start.set(Calendar.MONTH, Calendar.MARCH);
// start.setTime(simpleDateFormat.parse("2016-01"));
Calendar start = ExecuteWorkflow.startingLogPeriod;
logger.info("GetOpenAIRELogs starting period: " + sdf.format(start.getTime()));
Calendar end = Calendar.getInstance();
// Calendar end = Calendar.getInstance();
// end.add(Calendar.DAY_OF_MONTH, -1);
Calendar end = ExecuteWorkflow.endingLogPeriod;
end.add(Calendar.DAY_OF_MONTH, -1);
logger.info("GetOpenAIRELogs ending period: " + sdf.format(end.getTime()));
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
.prepareStatement(
"SELECT max(timestamp) FROM " + ConnectDB.getUsageStatsDBSchema()

View File

@ -20,6 +20,10 @@ import org.slf4j.LoggerFactory;
*/
public class UsageStatsExporter {
public UsageStatsExporter() {
}
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
public void runImpalaQuery() throws Exception {
@ -50,8 +54,6 @@ public class UsageStatsExporter {
private void reCreateLogDirs() throws IllegalArgumentException, IOException {
FileSystem dfs = FileSystem.get(new Configuration());
logger.info("Deleting log directories");
logger.info("Deleting repoLog directory: " + ExecuteWorkflow.repoLogPath);
dfs.delete(new Path(ExecuteWorkflow.repoLogPath), true);
@ -70,8 +72,6 @@ public class UsageStatsExporter {
logger.info("Deleting lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath);
dfs.delete(new Path(ExecuteWorkflow.lareferenciaLogPath), true);
logger.info("Creating log directories");
logger.info("Creating repoLog directory: " + ExecuteWorkflow.repoLogPath);
dfs.mkdirs(new Path(ExecuteWorkflow.repoLogPath));
@ -101,10 +101,9 @@ public class UsageStatsExporter {
// Create DB tables - they are also needed to download the statistics too
logger.info("Creating database and tables");
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
//
reCreateLogDirs();
System.exit(0);
logger.info("Recreating log directories");
reCreateLogDirs();
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
// // the moment
@ -118,6 +117,8 @@ public class UsageStatsExporter {
ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID);
logger.info("Downloaded piwik logs");
System.exit(0);
// Create DB tables, insert/update statistics
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";

View File

@ -100,5 +100,17 @@
"paramLongName": "downloadLogs",
"paramDescription": "download logs?",
"paramRequired": true
},
{
"paramName": "slp",
"paramLongName": "startingLogPeriod",
"paramDescription": "Starting log period",
"paramRequired": true
},
{
"paramName": "elp",
"paramLongName": "endingLogPeriod",
"paramDescription": "Ending log period",
"paramRequired": true
}
]

View File

@ -59,6 +59,8 @@
<arg>--usageStatsDBSchema</arg><arg>${usageStatsDBSchema}</arg>
<arg>--statsDBSchema</arg><arg>${statsDBSchema}</arg>
<arg>--downloadLogs</arg><arg>${downloadLogs}</arg>
<arg>--startingLogPeriod</arg><arg>${startingLogPeriod}</arg>
<arg>--endingLogPeriod</arg><arg>${endingLogPeriod}</arg>
<capture-output/>
</java>
<ok to="End" />