Adding flags and time limits to Irus stats

This commit is contained in:
Spyros Zoupanos 2020-10-06 23:44:25 +03:00
parent 9a1512004f
commit 8da64d8f54
6 changed files with 126 additions and 42 deletions

View File

@ -48,6 +48,11 @@ public class ExecuteWorkflow {
static boolean downloadLaReferenciaLogs;
static boolean processLaReferenciaLogs;
static boolean irusCreateTablesEmptyDirs;
static boolean irusDownloadReports;
static boolean irusProcessStats;
static int irusNumberOfOpendoarsToDownload;
public static void main(String args[]) throws Exception {
// Sending the logs to the console
@ -116,6 +121,20 @@ public class ExecuteWorkflow {
else
processLaReferenciaLogs = false;
if (parser.get("irusCreateTablesEmptyDirs").toLowerCase().equals("true"))
irusCreateTablesEmptyDirs = true;
else
irusCreateTablesEmptyDirs = false;
if (parser.get("irusDownloadReports").toLowerCase().equals("true"))
irusDownloadReports = true;
else
irusDownloadReports = false;
if (parser.get("irusProcessStats").toLowerCase().equals("true"))
irusProcessStats = true;
else
irusProcessStats = false;
irusNumberOfOpendoarsToDownload = Integer.parseInt(parser.get("irusNumberOfOpendoarsToDownload"));
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
usagestatsExport.export();
}

View File

@ -8,8 +8,10 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
@ -32,16 +34,22 @@ public class IrusStats {
public IrusStats(String irusUKURL) throws Exception {
this.irusUKURL = irusUKURL;
logger.info("Creating Irus Stats tables");
createTables();
logger.info("Created Irus Stats tables");
// The following may not be needed - It will be created when JSON tables are created
// createTmpTables();
}
private void createTables() throws Exception {
try {
public void reCreateLogDirs() throws Exception {
FileSystem dfs = FileSystem.get(new Configuration());
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
}
public void createTables() throws Exception {
try {
logger.info("Creating sushilog");
Statement stmt = ConnectDB.getHiveConnection().createStatement();
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
@ -198,17 +206,26 @@ public class IrusStats {
}
public void getIrusRRReport(String irusUKReportPath) throws Exception {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate="
+ simpleDateFormat.format(new Date())
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM");
// Setting the starting period
Calendar start = (Calendar) ExecuteWorkflow.startingLogPeriod.clone();
logger.info("Starting period for log download: " + sdf.format(start.getTime()));
// Setting the ending period (last day of the month)
Calendar end = (Calendar) ExecuteWorkflow.endingLogPeriod.clone();
end.add(Calendar.MONTH, +1);
end.add(Calendar.DAY_OF_MONTH, -1);
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=" +
sdf.format(start.getTime()) + "&EndDate=" + sdf.format(end.getTime()) +
"&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
logger.info("(processIrusRRReport) Getting report: " + reportUrl);
String text = getJson(reportUrl, "", "");
// log.info("Report: " + text);
List<String> opendoarsToVisit = new ArrayList<String>();
JSONParser parser = new JSONParser();
JSONObject jsonObject = (JSONObject) parser.parse(text);
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
@ -224,6 +241,7 @@ public class IrusStats {
JSONObject opendoar = (JSONObject) identifier;
if (opendoar.get("Type").toString().equals("OpenDOAR")) {
i++;
opendoarsToVisit.add(opendoar.get("Value").toString());
getIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
break;
}
@ -231,7 +249,22 @@ public class IrusStats {
// break;
}
logger.info("(processIrusRRReport) Finished with report: " + reportUrl);
logger.info("Found the following opendoars for download: " + opendoarsToVisit);
if (ExecuteWorkflow.irusNumberOfOpendoarsToDownload > 0 &&
ExecuteWorkflow.irusNumberOfOpendoarsToDownload <= opendoarsToVisit.size()) {
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.irusNumberOfOpendoarsToDownload);
opendoarsToVisit = opendoarsToVisit.subList(0, ExecuteWorkflow.irusNumberOfOpendoarsToDownload);
}
logger.info("Downloading the followins opendoars: " + opendoarsToVisit);
for (String opendoar : opendoarsToVisit) {
logger.info("Now working on piwikId: " + opendoar);
this.getIrusIRReport(opendoar, irusUKReportPath);
}
logger.info("Finished with report: " + reportUrl);
}
private void getIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
@ -242,13 +275,15 @@ public class IrusStats {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
Calendar start = Calendar.getInstance();
start.set(Calendar.YEAR, 2016);
start.set(Calendar.MONTH, Calendar.JANUARY);
// start.setTime(simpleDateFormat.parse("2016-01"));
// Setting the starting period
Calendar start = (Calendar) ExecuteWorkflow.startingLogPeriod.clone();
logger.info("Starting period for log download: " + simpleDateFormat.format(start.getTime()));
Calendar end = Calendar.getInstance();
// Setting the ending period (last day of the month)
Calendar end = (Calendar) ExecuteWorkflow.endingLogPeriod.clone();
end.add(Calendar.MONTH, +1);
end.add(Calendar.DAY_OF_MONTH, -1);
logger.info("Ending period for log download: " + simpleDateFormat.format(end.getTime()));
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB
@ -264,10 +299,6 @@ public class IrusStats {
}
}
rs_date.close();
PreparedStatement preparedStatement = ConnectDB
.getHiveConnection()
.prepareStatement(
"INSERT INTO sushilogtmp (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
int batch_size = 0;
while (start.before(end)) {
@ -310,7 +341,6 @@ public class IrusStats {
fin.close();
}
preparedStatement.executeBatch();
ConnectDB.getHiveConnection().close();
logger.info("(processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);

View File

@ -137,7 +137,7 @@ public class LaReferenciaDownloadLogs {
String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth;
String content = "";
List<Integer> siteIdToVisit = new ArrayList<Integer>();
List<Integer> siteIdsToVisit = new ArrayList<Integer>();
// Getting all the siteIds in a list for logging reasons & limiting the list
// to the max number of siteIds
@ -146,19 +146,19 @@ public class LaReferenciaDownloadLogs {
JSONArray jsonArray = (JSONArray) parser.parse(content);
for (Object aJsonArray : jsonArray) {
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
siteIdToVisit.add(Integer.parseInt(jsonObjectRow.get("idsite").toString()));
siteIdsToVisit.add(Integer.parseInt(jsonObjectRow.get("idsite").toString()));
}
logger.info("Found the following siteIds for download: " + siteIdToVisit);
logger.info("Found the following siteIds for download: " + siteIdsToVisit);
if (ExecuteWorkflow.numberOfPiwikIdsToDownload > 0 &&
ExecuteWorkflow.numberOfPiwikIdsToDownload <= siteIdToVisit.size()) {
ExecuteWorkflow.numberOfPiwikIdsToDownload <= siteIdsToVisit.size()) {
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.numberOfPiwikIdsToDownload);
siteIdToVisit = siteIdToVisit.subList(0, ExecuteWorkflow.numberOfPiwikIdsToDownload);
siteIdsToVisit = siteIdsToVisit.subList(0, ExecuteWorkflow.numberOfPiwikIdsToDownload);
}
logger.info("Downloading from repos with the followins siteIds: " + siteIdToVisit);
logger.info("Downloading from repos with the followins siteIds: " + siteIdsToVisit);
for (int siteId : siteIdToVisit) {
for (int siteId : siteIdsToVisit) {
logger.info("Now working on piwikId: " + siteId);
this.GetLaReFerenciaLogs(repoLogsPath, siteId);
}

View File

@ -60,9 +60,6 @@ public class UsageStatsExporter {
logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath);
dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true);
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true);
@ -78,9 +75,6 @@ public class UsageStatsExporter {
logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath);
dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath));
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray));
@ -146,14 +140,28 @@ public class UsageStatsExporter {
lastats.processLogs();
logger.info("LaReferencia logs done");
}
IrusStats irusstats = new IrusStats(ExecuteWorkflow.irusUKBaseURL);
if (ExecuteWorkflow.irusCreateTablesEmptyDirs) {
logger.info("Creating Irus Stats tables");
irusstats.createTables();
logger.info("Created Irus Stats tables");
logger.info("Re-create log dirs");
irusstats.reCreateLogDirs();
logger.info("Re-created log dirs");
}
if (ExecuteWorkflow.irusDownloadReports) {
irusstats.getIrusRRReport(ExecuteWorkflow.irusUKReportPath);
}
if (ExecuteWorkflow.irusProcessStats) {
irusstats.processIrusStats();
logger.info("Irus done");
}
System.exit(0);
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
// irusstats.getIrusRRReport(irusUKReportPath);
// irusstats.processIrusStats();
// log.info("irus done");
// SarcStats sarcStats = new SarcStats();
// sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray);
// sarcStats.finalizeSarcStats();

View File

@ -148,6 +148,29 @@
"paramLongName": "downloadLaReferenciaLogs",
"paramDescription": "download La Referencia logs?",
"paramRequired": true
},
{
"paramName": "icted",
"paramLongName": "irusCreateTablesEmptyDirs",
"paramDescription": "Irus section: Create tables and empty JSON directories?",
"paramRequired": true
},
{
"paramName": "idr",
"paramLongName": "irusDownloadReports",
"paramDescription": "Irus section: Download reports?",
"paramRequired": true
},
{
"paramName": "ipr",
"paramLongName": "irusProcessStats",
"paramDescription": "Irus section: Process stats?",
"paramRequired": true
},
{
"paramName": "inod",
"paramLongName": "irusNumberOfOpendoarsToDownload",
"paramDescription": "Limit the number of the downloaded Opendoars (Irus) to the first irusNumberOfOpendoarsToDownload",
"paramRequired": true
}
]

View File

@ -67,6 +67,10 @@
<arg>--numberOfSiteIdsToDownload</arg><arg>${numberOfSiteIdsToDownload}</arg>
<arg>--downloadLaReferenciaLogs</arg><arg>${downloadLaReferenciaLogs}</arg>
<arg>--processLaReferenciaLogs</arg><arg>${processLaReferenciaLogs}</arg>
<arg>--irusCreateTablesEmptyDirs</arg><arg>${irusCreateTablesEmptyDirs}</arg>
<arg>--irusDownloadReports</arg><arg>${irusDownloadReports}</arg>
<arg>--irusProcessStats</arg><arg>${irusProcessStats}</arg>
<arg>--irusNumberOfOpendoarsToDownload</arg><arg>${irusNumberOfOpendoarsToDownload}</arg>
<capture-output/>
</java>
<ok to="End" />