forked from D-Net/dnet-hadoop
Changes for proper log downloading (limits on starting and ending period) + loggers to STDOUT
This commit is contained in:
parent
e2748fea95
commit
7b7075cfdd
|
@ -6,7 +6,12 @@
|
|||
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.log4j.BasicConfigurator;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
|
@ -32,9 +37,14 @@ public class ExecuteWorkflow {
|
|||
static String usageStatsDBSchema;
|
||||
static String statsDBSchema;
|
||||
static boolean downloadLogs;
|
||||
static Calendar startingLogPeriod;
|
||||
static Calendar endingLogPeriod;
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
|
||||
// Sending the logs to the console
|
||||
BasicConfigurator.configure();
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
|
@ -57,17 +67,33 @@ public class ExecuteWorkflow {
|
|||
lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
|
||||
lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
|
||||
|
||||
if (parser.get("downloadLogs").toLowerCase().equals("true"))
|
||||
downloadLogs = true;
|
||||
else
|
||||
downloadLogs = false;
|
||||
|
||||
dbHiveUrl = parser.get("dbHiveUrl");
|
||||
dbImpalaUrl = parser.get("dbImpalaUrl");
|
||||
usageStatsDBSchema = parser.get("usageStatsDBSchema");
|
||||
statsDBSchema = parser.get("statsDBSchema");
|
||||
|
||||
if (parser.get("downloadLogs").toLowerCase().equals("true"))
|
||||
downloadLogs = true;
|
||||
else
|
||||
downloadLogs = false;
|
||||
|
||||
String startingLogPeriodStr = parser.get("startingLogPeriod");
|
||||
Date startingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(startingLogPeriodStr);
|
||||
startingLogPeriod = startingLogPeriodStr(startingLogPeriodDate);
|
||||
|
||||
String endingLogPeriodStr = parser.get("endingLogPeriod");
|
||||
Date endingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(endingLogPeriodStr);
|
||||
endingLogPeriod = startingLogPeriodStr(endingLogPeriodDate);
|
||||
|
||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
||||
usagestatsExport.export();
|
||||
}
|
||||
|
||||
private static Calendar startingLogPeriodStr(Date date) {
|
||||
|
||||
Calendar calendar = Calendar.getInstance();
|
||||
calendar.setTime(date);
|
||||
return calendar;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,6 +87,8 @@ public class PiwikDownloadLogs {
|
|||
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||
|
||||
Statement statement = ConnectDB.getHiveConnection().createStatement();
|
||||
// SimpleDateFormat sdf = new SimpleDateFormat("MM/dd/yyyy");
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
|
||||
ResultSet rs = statement
|
||||
.executeQuery(
|
||||
|
@ -94,17 +96,21 @@ public class PiwikDownloadLogs {
|
|||
+ ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id");
|
||||
while (rs.next()) {
|
||||
int siteId = rs.getInt(1);
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
// SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
Calendar start = Calendar.getInstance();
|
||||
start.set(Calendar.YEAR, 2016);
|
||||
start.set(Calendar.MONTH, Calendar.MARCH);
|
||||
// Calendar start = Calendar.getInstance();
|
||||
// start.set(Calendar.YEAR, 2016);
|
||||
// start.set(Calendar.MONTH, Calendar.MARCH);
|
||||
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||
Calendar start = ExecuteWorkflow.startingLogPeriod;
|
||||
logger.info("GetOpenAIRELogs starting period: " + sdf.format(start.getTime()));
|
||||
|
||||
Calendar end = Calendar.getInstance();
|
||||
// Calendar end = Calendar.getInstance();
|
||||
// end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
Calendar end = ExecuteWorkflow.endingLogPeriod;
|
||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
logger.info("GetOpenAIRELogs ending period: " + sdf.format(end.getTime()));
|
||||
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
|
||||
.prepareStatement(
|
||||
"SELECT max(timestamp) FROM " + ConnectDB.getUsageStatsDBSchema()
|
||||
|
|
|
@ -20,6 +20,10 @@ import org.slf4j.LoggerFactory;
|
|||
*/
|
||||
public class UsageStatsExporter {
|
||||
|
||||
public UsageStatsExporter() {
|
||||
|
||||
}
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
|
||||
|
||||
public void runImpalaQuery() throws Exception {
|
||||
|
@ -50,8 +54,6 @@ public class UsageStatsExporter {
|
|||
private void reCreateLogDirs() throws IllegalArgumentException, IOException {
|
||||
FileSystem dfs = FileSystem.get(new Configuration());
|
||||
|
||||
logger.info("Deleting log directories");
|
||||
|
||||
logger.info("Deleting repoLog directory: " + ExecuteWorkflow.repoLogPath);
|
||||
dfs.delete(new Path(ExecuteWorkflow.repoLogPath), true);
|
||||
|
||||
|
@ -70,8 +72,6 @@ public class UsageStatsExporter {
|
|||
logger.info("Deleting lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath);
|
||||
dfs.delete(new Path(ExecuteWorkflow.lareferenciaLogPath), true);
|
||||
|
||||
logger.info("Creating log directories");
|
||||
|
||||
logger.info("Creating repoLog directory: " + ExecuteWorkflow.repoLogPath);
|
||||
dfs.mkdirs(new Path(ExecuteWorkflow.repoLogPath));
|
||||
|
||||
|
@ -101,10 +101,9 @@ public class UsageStatsExporter {
|
|||
// Create DB tables - they are also needed to download the statistics too
|
||||
logger.info("Creating database and tables");
|
||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
||||
//
|
||||
reCreateLogDirs();
|
||||
|
||||
System.exit(0);
|
||||
logger.info("Recreating log directories");
|
||||
reCreateLogDirs();
|
||||
|
||||
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||
// // the moment
|
||||
|
@ -118,6 +117,8 @@ public class UsageStatsExporter {
|
|||
ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID);
|
||||
logger.info("Downloaded piwik logs");
|
||||
|
||||
System.exit(0);
|
||||
|
||||
// Create DB tables, insert/update statistics
|
||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
||||
|
|
|
@ -100,5 +100,17 @@
|
|||
"paramLongName": "downloadLogs",
|
||||
"paramDescription": "download logs?",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "slp",
|
||||
"paramLongName": "startingLogPeriod",
|
||||
"paramDescription": "Starting log period",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "elp",
|
||||
"paramLongName": "endingLogPeriod",
|
||||
"paramDescription": "Ending log period",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
|
|
@ -59,6 +59,8 @@
|
|||
<arg>--usageStatsDBSchema</arg><arg>${usageStatsDBSchema}</arg>
|
||||
<arg>--statsDBSchema</arg><arg>${statsDBSchema}</arg>
|
||||
<arg>--downloadLogs</arg><arg>${downloadLogs}</arg>
|
||||
<arg>--startingLogPeriod</arg><arg>${startingLogPeriod}</arg>
|
||||
<arg>--endingLogPeriod</arg><arg>${endingLogPeriod}</arg>
|
||||
<capture-output/>
|
||||
</java>
|
||||
<ok to="End" />
|
||||
|
|
Loading…
Reference in New Issue