Adding flags and time limits to Irus stats

This commit is contained in:
Spyros Zoupanos 2020-10-06 23:44:25 +03:00
parent 9a1512004f
commit 8da64d8f54
6 changed files with 126 additions and 42 deletions

View File

@ -48,6 +48,11 @@ public class ExecuteWorkflow {
static boolean downloadLaReferenciaLogs; static boolean downloadLaReferenciaLogs;
static boolean processLaReferenciaLogs; static boolean processLaReferenciaLogs;
static boolean irusCreateTablesEmptyDirs;
static boolean irusDownloadReports;
static boolean irusProcessStats;
static int irusNumberOfOpendoarsToDownload;
public static void main(String args[]) throws Exception { public static void main(String args[]) throws Exception {
// Sending the logs to the console // Sending the logs to the console
@ -116,6 +121,20 @@ public class ExecuteWorkflow {
else else
processLaReferenciaLogs = false; processLaReferenciaLogs = false;
if (parser.get("irusCreateTablesEmptyDirs").toLowerCase().equals("true"))
irusCreateTablesEmptyDirs = true;
else
irusCreateTablesEmptyDirs = false;
if (parser.get("irusDownloadReports").toLowerCase().equals("true"))
irusDownloadReports = true;
else
irusDownloadReports = false;
if (parser.get("irusProcessStats").toLowerCase().equals("true"))
irusProcessStats = true;
else
irusProcessStats = false;
irusNumberOfOpendoarsToDownload = Integer.parseInt(parser.get("irusNumberOfOpendoarsToDownload"));
UsageStatsExporter usagestatsExport = new UsageStatsExporter(); UsageStatsExporter usagestatsExport = new UsageStatsExporter();
usagestatsExport.export(); usagestatsExport.export();
} }

View File

@ -8,8 +8,10 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.Statement; import java.sql.Statement;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar; import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.List;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
@ -32,16 +34,22 @@ public class IrusStats {
public IrusStats(String irusUKURL) throws Exception { public IrusStats(String irusUKURL) throws Exception {
this.irusUKURL = irusUKURL; this.irusUKURL = irusUKURL;
logger.info("Creating Irus Stats tables");
createTables();
logger.info("Created Irus Stats tables");
// The following may not be needed - It will be created when JSON tables are created // The following may not be needed - It will be created when JSON tables are created
// createTmpTables(); // createTmpTables();
} }
private void createTables() throws Exception { public void reCreateLogDirs() throws Exception {
try { FileSystem dfs = FileSystem.get(new Configuration());
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
}
public void createTables() throws Exception {
try {
logger.info("Creating sushilog"); logger.info("Creating sushilog");
Statement stmt = ConnectDB.getHiveConnection().createStatement(); Statement stmt = ConnectDB.getHiveConnection().createStatement();
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
@ -198,17 +206,26 @@ public class IrusStats {
} }
public void getIrusRRReport(String irusUKReportPath) throws Exception { public void getIrusRRReport(String irusUKReportPath) throws Exception {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM");
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" // Setting the starting period
+ simpleDateFormat.format(new Date()) Calendar start = (Calendar) ExecuteWorkflow.startingLogPeriod.clone();
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback="; logger.info("Starting period for log download: " + sdf.format(start.getTime()));
// Setting the ending period (last day of the month)
Calendar end = (Calendar) ExecuteWorkflow.endingLogPeriod.clone();
end.add(Calendar.MONTH, +1);
end.add(Calendar.DAY_OF_MONTH, -1);
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=" +
sdf.format(start.getTime()) + "&EndDate=" + sdf.format(end.getTime()) +
"&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
logger.info("(processIrusRRReport) Getting report: " + reportUrl); logger.info("(processIrusRRReport) Getting report: " + reportUrl);
String text = getJson(reportUrl, "", ""); String text = getJson(reportUrl, "", "");
// log.info("Report: " + text); List<String> opendoarsToVisit = new ArrayList<String>();
JSONParser parser = new JSONParser(); JSONParser parser = new JSONParser();
JSONObject jsonObject = (JSONObject) parser.parse(text); JSONObject jsonObject = (JSONObject) parser.parse(text);
jsonObject = (JSONObject) jsonObject.get("ReportResponse"); jsonObject = (JSONObject) jsonObject.get("ReportResponse");
@ -224,6 +241,7 @@ public class IrusStats {
JSONObject opendoar = (JSONObject) identifier; JSONObject opendoar = (JSONObject) identifier;
if (opendoar.get("Type").toString().equals("OpenDOAR")) { if (opendoar.get("Type").toString().equals("OpenDOAR")) {
i++; i++;
opendoarsToVisit.add(opendoar.get("Value").toString());
getIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath); getIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
break; break;
} }
@ -231,7 +249,22 @@ public class IrusStats {
// break; // break;
} }
logger.info("(processIrusRRReport) Finished with report: " + reportUrl); logger.info("Found the following opendoars for download: " + opendoarsToVisit);
if (ExecuteWorkflow.irusNumberOfOpendoarsToDownload > 0 &&
ExecuteWorkflow.irusNumberOfOpendoarsToDownload <= opendoarsToVisit.size()) {
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.irusNumberOfOpendoarsToDownload);
opendoarsToVisit = opendoarsToVisit.subList(0, ExecuteWorkflow.irusNumberOfOpendoarsToDownload);
}
logger.info("Downloading the followins opendoars: " + opendoarsToVisit);
for (String opendoar : opendoarsToVisit) {
logger.info("Now working on piwikId: " + opendoar);
this.getIrusIRReport(opendoar, irusUKReportPath);
}
logger.info("Finished with report: " + reportUrl);
} }
private void getIrusIRReport(String opendoar, String irusUKReportPath) throws Exception { private void getIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
@ -242,13 +275,15 @@ public class IrusStats {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
Calendar start = Calendar.getInstance(); // Setting the starting period
start.set(Calendar.YEAR, 2016); Calendar start = (Calendar) ExecuteWorkflow.startingLogPeriod.clone();
start.set(Calendar.MONTH, Calendar.JANUARY); logger.info("Starting period for log download: " + simpleDateFormat.format(start.getTime()));
// start.setTime(simpleDateFormat.parse("2016-01"));
Calendar end = Calendar.getInstance(); // Setting the ending period (last day of the month)
Calendar end = (Calendar) ExecuteWorkflow.endingLogPeriod.clone();
end.add(Calendar.MONTH, +1);
end.add(Calendar.DAY_OF_MONTH, -1); end.add(Calendar.DAY_OF_MONTH, -1);
logger.info("Ending period for log download: " + simpleDateFormat.format(end.getTime()));
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB PreparedStatement st = ConnectDB
@ -264,10 +299,6 @@ public class IrusStats {
} }
} }
rs_date.close(); rs_date.close();
PreparedStatement preparedStatement = ConnectDB
.getHiveConnection()
.prepareStatement(
"INSERT INTO sushilogtmp (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
int batch_size = 0; int batch_size = 0;
while (start.before(end)) { while (start.before(end)) {
@ -310,7 +341,6 @@ public class IrusStats {
fin.close(); fin.close();
} }
preparedStatement.executeBatch();
ConnectDB.getHiveConnection().close(); ConnectDB.getHiveConnection().close();
logger.info("(processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar); logger.info("(processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);

View File

@ -137,7 +137,7 @@ public class LaReferenciaDownloadLogs {
String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth; String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth;
String content = ""; String content = "";
List<Integer> siteIdToVisit = new ArrayList<Integer>(); List<Integer> siteIdsToVisit = new ArrayList<Integer>();
// Getting all the siteIds in a list for logging reasons & limiting the list // Getting all the siteIds in a list for logging reasons & limiting the list
// to the max number of siteIds // to the max number of siteIds
@ -146,19 +146,19 @@ public class LaReferenciaDownloadLogs {
JSONArray jsonArray = (JSONArray) parser.parse(content); JSONArray jsonArray = (JSONArray) parser.parse(content);
for (Object aJsonArray : jsonArray) { for (Object aJsonArray : jsonArray) {
JSONObject jsonObjectRow = (JSONObject) aJsonArray; JSONObject jsonObjectRow = (JSONObject) aJsonArray;
siteIdToVisit.add(Integer.parseInt(jsonObjectRow.get("idsite").toString())); siteIdsToVisit.add(Integer.parseInt(jsonObjectRow.get("idsite").toString()));
} }
logger.info("Found the following siteIds for download: " + siteIdToVisit); logger.info("Found the following siteIds for download: " + siteIdsToVisit);
if (ExecuteWorkflow.numberOfPiwikIdsToDownload > 0 && if (ExecuteWorkflow.numberOfPiwikIdsToDownload > 0 &&
ExecuteWorkflow.numberOfPiwikIdsToDownload <= siteIdToVisit.size()) { ExecuteWorkflow.numberOfPiwikIdsToDownload <= siteIdsToVisit.size()) {
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.numberOfPiwikIdsToDownload); logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.numberOfPiwikIdsToDownload);
siteIdToVisit = siteIdToVisit.subList(0, ExecuteWorkflow.numberOfPiwikIdsToDownload); siteIdsToVisit = siteIdsToVisit.subList(0, ExecuteWorkflow.numberOfPiwikIdsToDownload);
} }
logger.info("Downloading from repos with the followins siteIds: " + siteIdToVisit); logger.info("Downloading from repos with the followins siteIds: " + siteIdsToVisit);
for (int siteId : siteIdToVisit) { for (int siteId : siteIdsToVisit) {
logger.info("Now working on piwikId: " + siteId); logger.info("Now working on piwikId: " + siteId);
this.GetLaReFerenciaLogs(repoLogsPath, siteId); this.GetLaReFerenciaLogs(repoLogsPath, siteId);
} }

View File

@ -60,9 +60,6 @@ public class UsageStatsExporter {
logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath); logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath);
dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true); dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true);
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray); logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true); dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true);
@ -78,9 +75,6 @@ public class UsageStatsExporter {
logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath); logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath);
dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath)); dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath));
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray); logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray)); dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray));
@ -146,14 +140,28 @@ public class UsageStatsExporter {
lastats.processLogs(); lastats.processLogs();
logger.info("LaReferencia logs done"); logger.info("LaReferencia logs done");
} }
IrusStats irusstats = new IrusStats(ExecuteWorkflow.irusUKBaseURL);
if (ExecuteWorkflow.irusCreateTablesEmptyDirs) {
logger.info("Creating Irus Stats tables");
irusstats.createTables();
logger.info("Created Irus Stats tables");
logger.info("Re-create log dirs");
irusstats.reCreateLogDirs();
logger.info("Re-created log dirs");
}
if (ExecuteWorkflow.irusDownloadReports) {
irusstats.getIrusRRReport(ExecuteWorkflow.irusUKReportPath);
}
if (ExecuteWorkflow.irusProcessStats) {
irusstats.processIrusStats();
logger.info("Irus done");
}
System.exit(0); System.exit(0);
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
// irusstats.getIrusRRReport(irusUKReportPath);
// irusstats.processIrusStats();
// log.info("irus done");
// SarcStats sarcStats = new SarcStats(); // SarcStats sarcStats = new SarcStats();
// sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray); // sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray);
// sarcStats.finalizeSarcStats(); // sarcStats.finalizeSarcStats();

View File

@ -148,6 +148,29 @@
"paramLongName": "downloadLaReferenciaLogs", "paramLongName": "downloadLaReferenciaLogs",
"paramDescription": "download La Referencia logs?", "paramDescription": "download La Referencia logs?",
"paramRequired": true "paramRequired": true
},
{
"paramName": "icted",
"paramLongName": "irusCreateTablesEmptyDirs",
"paramDescription": "Irus section: Create tables and empty JSON directories?",
"paramRequired": true
},
{
"paramName": "idr",
"paramLongName": "irusDownloadReports",
"paramDescription": "Irus section: Download reports?",
"paramRequired": true
},
{
"paramName": "ipr",
"paramLongName": "irusProcessStats",
"paramDescription": "Irus section: Process stats?",
"paramRequired": true
},
{
"paramName": "inod",
"paramLongName": "irusNumberOfOpendoarsToDownload",
"paramDescription": "Limit the number of the downloaded Opendoars (Irus) to the first irusNumberOfOpendoarsToDownload",
"paramRequired": true
} }
] ]

View File

@ -67,6 +67,10 @@
<arg>--numberOfSiteIdsToDownload</arg><arg>${numberOfSiteIdsToDownload}</arg> <arg>--numberOfSiteIdsToDownload</arg><arg>${numberOfSiteIdsToDownload}</arg>
<arg>--downloadLaReferenciaLogs</arg><arg>${downloadLaReferenciaLogs}</arg> <arg>--downloadLaReferenciaLogs</arg><arg>${downloadLaReferenciaLogs}</arg>
<arg>--processLaReferenciaLogs</arg><arg>${processLaReferenciaLogs}</arg> <arg>--processLaReferenciaLogs</arg><arg>${processLaReferenciaLogs}</arg>
<arg>--irusCreateTablesEmptyDirs</arg><arg>${irusCreateTablesEmptyDirs}</arg>
<arg>--irusDownloadReports</arg><arg>${irusDownloadReports}</arg>
<arg>--irusProcessStats</arg><arg>${irusProcessStats}</arg>
<arg>--irusNumberOfOpendoarsToDownload</arg><arg>${irusNumberOfOpendoarsToDownload}</arg>
<capture-output/> <capture-output/>
</java> </java>
<ok to="End" /> <ok to="End" />