forked from D-Net/dnet-hadoop
Adding flags and time limits to Irus stats
This commit is contained in:
parent
9a1512004f
commit
8da64d8f54
|
@ -48,6 +48,11 @@ public class ExecuteWorkflow {
|
|||
static boolean downloadLaReferenciaLogs;
|
||||
static boolean processLaReferenciaLogs;
|
||||
|
||||
static boolean irusCreateTablesEmptyDirs;
|
||||
static boolean irusDownloadReports;
|
||||
static boolean irusProcessStats;
|
||||
static int irusNumberOfOpendoarsToDownload;
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
|
||||
// Sending the logs to the console
|
||||
|
@ -116,6 +121,20 @@ public class ExecuteWorkflow {
|
|||
else
|
||||
processLaReferenciaLogs = false;
|
||||
|
||||
if (parser.get("irusCreateTablesEmptyDirs").toLowerCase().equals("true"))
|
||||
irusCreateTablesEmptyDirs = true;
|
||||
else
|
||||
irusCreateTablesEmptyDirs = false;
|
||||
if (parser.get("irusDownloadReports").toLowerCase().equals("true"))
|
||||
irusDownloadReports = true;
|
||||
else
|
||||
irusDownloadReports = false;
|
||||
if (parser.get("irusProcessStats").toLowerCase().equals("true"))
|
||||
irusProcessStats = true;
|
||||
else
|
||||
irusProcessStats = false;
|
||||
irusNumberOfOpendoarsToDownload = Integer.parseInt(parser.get("irusNumberOfOpendoarsToDownload"));
|
||||
|
||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
||||
usagestatsExport.export();
|
||||
}
|
||||
|
|
|
@ -8,8 +8,10 @@ import java.sql.PreparedStatement;
|
|||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
|
@ -32,16 +34,22 @@ public class IrusStats {
|
|||
|
||||
public IrusStats(String irusUKURL) throws Exception {
|
||||
this.irusUKURL = irusUKURL;
|
||||
logger.info("Creating Irus Stats tables");
|
||||
createTables();
|
||||
logger.info("Created Irus Stats tables");
|
||||
// The following may not be needed - It will be created when JSON tables are created
|
||||
// createTmpTables();
|
||||
}
|
||||
|
||||
private void createTables() throws Exception {
|
||||
try {
|
||||
public void reCreateLogDirs() throws Exception {
|
||||
FileSystem dfs = FileSystem.get(new Configuration());
|
||||
|
||||
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
||||
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
|
||||
|
||||
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
||||
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
|
||||
}
|
||||
|
||||
public void createTables() throws Exception {
|
||||
try {
|
||||
logger.info("Creating sushilog");
|
||||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
|
@ -198,17 +206,26 @@ public class IrusStats {
|
|||
}
|
||||
|
||||
public void getIrusRRReport(String irusUKReportPath) throws Exception {
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate="
|
||||
+ simpleDateFormat.format(new Date())
|
||||
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM");
|
||||
// Setting the starting period
|
||||
Calendar start = (Calendar) ExecuteWorkflow.startingLogPeriod.clone();
|
||||
logger.info("Starting period for log download: " + sdf.format(start.getTime()));
|
||||
|
||||
// Setting the ending period (last day of the month)
|
||||
Calendar end = (Calendar) ExecuteWorkflow.endingLogPeriod.clone();
|
||||
end.add(Calendar.MONTH, +1);
|
||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
|
||||
|
||||
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=" +
|
||||
sdf.format(start.getTime()) + "&EndDate=" + sdf.format(end.getTime()) +
|
||||
"&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
||||
|
||||
logger.info("(processIrusRRReport) Getting report: " + reportUrl);
|
||||
|
||||
String text = getJson(reportUrl, "", "");
|
||||
|
||||
// log.info("Report: " + text);
|
||||
|
||||
List<String> opendoarsToVisit = new ArrayList<String>();
|
||||
JSONParser parser = new JSONParser();
|
||||
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||
|
@ -224,6 +241,7 @@ public class IrusStats {
|
|||
JSONObject opendoar = (JSONObject) identifier;
|
||||
if (opendoar.get("Type").toString().equals("OpenDOAR")) {
|
||||
i++;
|
||||
opendoarsToVisit.add(opendoar.get("Value").toString());
|
||||
getIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
|
||||
break;
|
||||
}
|
||||
|
@ -231,7 +249,22 @@ public class IrusStats {
|
|||
// break;
|
||||
}
|
||||
|
||||
logger.info("(processIrusRRReport) Finished with report: " + reportUrl);
|
||||
logger.info("Found the following opendoars for download: " + opendoarsToVisit);
|
||||
|
||||
if (ExecuteWorkflow.irusNumberOfOpendoarsToDownload > 0 &&
|
||||
ExecuteWorkflow.irusNumberOfOpendoarsToDownload <= opendoarsToVisit.size()) {
|
||||
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.irusNumberOfOpendoarsToDownload);
|
||||
opendoarsToVisit = opendoarsToVisit.subList(0, ExecuteWorkflow.irusNumberOfOpendoarsToDownload);
|
||||
}
|
||||
|
||||
logger.info("Downloading the followins opendoars: " + opendoarsToVisit);
|
||||
|
||||
for (String opendoar : opendoarsToVisit) {
|
||||
logger.info("Now working on piwikId: " + opendoar);
|
||||
this.getIrusIRReport(opendoar, irusUKReportPath);
|
||||
}
|
||||
|
||||
logger.info("Finished with report: " + reportUrl);
|
||||
}
|
||||
|
||||
private void getIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
|
||||
|
@ -242,13 +275,15 @@ public class IrusStats {
|
|||
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
Calendar start = Calendar.getInstance();
|
||||
start.set(Calendar.YEAR, 2016);
|
||||
start.set(Calendar.MONTH, Calendar.JANUARY);
|
||||
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||
// Setting the starting period
|
||||
Calendar start = (Calendar) ExecuteWorkflow.startingLogPeriod.clone();
|
||||
logger.info("Starting period for log download: " + simpleDateFormat.format(start.getTime()));
|
||||
|
||||
Calendar end = Calendar.getInstance();
|
||||
// Setting the ending period (last day of the month)
|
||||
Calendar end = (Calendar) ExecuteWorkflow.endingLogPeriod.clone();
|
||||
end.add(Calendar.MONTH, +1);
|
||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
logger.info("Ending period for log download: " + simpleDateFormat.format(end.getTime()));
|
||||
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
PreparedStatement st = ConnectDB
|
||||
|
@ -264,10 +299,6 @@ public class IrusStats {
|
|||
}
|
||||
}
|
||||
rs_date.close();
|
||||
PreparedStatement preparedStatement = ConnectDB
|
||||
.getHiveConnection()
|
||||
.prepareStatement(
|
||||
"INSERT INTO sushilogtmp (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
|
||||
int batch_size = 0;
|
||||
|
||||
while (start.before(end)) {
|
||||
|
@ -310,7 +341,6 @@ public class IrusStats {
|
|||
fin.close();
|
||||
}
|
||||
|
||||
preparedStatement.executeBatch();
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
||||
logger.info("(processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
|
||||
|
|
|
@ -137,7 +137,7 @@ public class LaReferenciaDownloadLogs {
|
|||
String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth;
|
||||
String content = "";
|
||||
|
||||
List<Integer> siteIdToVisit = new ArrayList<Integer>();
|
||||
List<Integer> siteIdsToVisit = new ArrayList<Integer>();
|
||||
|
||||
// Getting all the siteIds in a list for logging reasons & limiting the list
|
||||
// to the max number of siteIds
|
||||
|
@ -146,19 +146,19 @@ public class LaReferenciaDownloadLogs {
|
|||
JSONArray jsonArray = (JSONArray) parser.parse(content);
|
||||
for (Object aJsonArray : jsonArray) {
|
||||
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
siteIdToVisit.add(Integer.parseInt(jsonObjectRow.get("idsite").toString()));
|
||||
siteIdsToVisit.add(Integer.parseInt(jsonObjectRow.get("idsite").toString()));
|
||||
}
|
||||
logger.info("Found the following siteIds for download: " + siteIdToVisit);
|
||||
logger.info("Found the following siteIds for download: " + siteIdsToVisit);
|
||||
|
||||
if (ExecuteWorkflow.numberOfPiwikIdsToDownload > 0 &&
|
||||
ExecuteWorkflow.numberOfPiwikIdsToDownload <= siteIdToVisit.size()) {
|
||||
ExecuteWorkflow.numberOfPiwikIdsToDownload <= siteIdsToVisit.size()) {
|
||||
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.numberOfPiwikIdsToDownload);
|
||||
siteIdToVisit = siteIdToVisit.subList(0, ExecuteWorkflow.numberOfPiwikIdsToDownload);
|
||||
siteIdsToVisit = siteIdsToVisit.subList(0, ExecuteWorkflow.numberOfPiwikIdsToDownload);
|
||||
}
|
||||
|
||||
logger.info("Downloading from repos with the followins siteIds: " + siteIdToVisit);
|
||||
logger.info("Downloading from repos with the followins siteIds: " + siteIdsToVisit);
|
||||
|
||||
for (int siteId : siteIdToVisit) {
|
||||
for (int siteId : siteIdsToVisit) {
|
||||
logger.info("Now working on piwikId: " + siteId);
|
||||
this.GetLaReFerenciaLogs(repoLogsPath, siteId);
|
||||
}
|
||||
|
|
|
@ -60,9 +60,6 @@ public class UsageStatsExporter {
|
|||
logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath);
|
||||
dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true);
|
||||
|
||||
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
||||
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
|
||||
|
||||
logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
||||
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true);
|
||||
|
||||
|
@ -78,9 +75,6 @@ public class UsageStatsExporter {
|
|||
logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath);
|
||||
dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath));
|
||||
|
||||
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
||||
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
|
||||
|
||||
logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
||||
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray));
|
||||
|
||||
|
@ -146,14 +140,28 @@ public class UsageStatsExporter {
|
|||
lastats.processLogs();
|
||||
logger.info("LaReferencia logs done");
|
||||
}
|
||||
|
||||
IrusStats irusstats = new IrusStats(ExecuteWorkflow.irusUKBaseURL);
|
||||
if (ExecuteWorkflow.irusCreateTablesEmptyDirs) {
|
||||
logger.info("Creating Irus Stats tables");
|
||||
irusstats.createTables();
|
||||
logger.info("Created Irus Stats tables");
|
||||
|
||||
logger.info("Re-create log dirs");
|
||||
irusstats.reCreateLogDirs();
|
||||
logger.info("Re-created log dirs");
|
||||
}
|
||||
|
||||
if (ExecuteWorkflow.irusDownloadReports) {
|
||||
irusstats.getIrusRRReport(ExecuteWorkflow.irusUKReportPath);
|
||||
}
|
||||
if (ExecuteWorkflow.irusProcessStats) {
|
||||
irusstats.processIrusStats();
|
||||
logger.info("Irus done");
|
||||
}
|
||||
|
||||
System.exit(0);
|
||||
|
||||
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||
// irusstats.getIrusRRReport(irusUKReportPath);
|
||||
|
||||
// irusstats.processIrusStats();
|
||||
// log.info("irus done");
|
||||
|
||||
// SarcStats sarcStats = new SarcStats();
|
||||
// sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray);
|
||||
// sarcStats.finalizeSarcStats();
|
||||
|
|
|
@ -148,6 +148,29 @@
|
|||
"paramLongName": "downloadLaReferenciaLogs",
|
||||
"paramDescription": "download La Referencia logs?",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "icted",
|
||||
"paramLongName": "irusCreateTablesEmptyDirs",
|
||||
"paramDescription": "Irus section: Create tables and empty JSON directories?",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "idr",
|
||||
"paramLongName": "irusDownloadReports",
|
||||
"paramDescription": "Irus section: Download reports?",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ipr",
|
||||
"paramLongName": "irusProcessStats",
|
||||
"paramDescription": "Irus section: Process stats?",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "inod",
|
||||
"paramLongName": "irusNumberOfOpendoarsToDownload",
|
||||
"paramDescription": "Limit the number of the downloaded Opendoars (Irus) to the first irusNumberOfOpendoarsToDownload",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
|
|
@ -67,6 +67,10 @@
|
|||
<arg>--numberOfSiteIdsToDownload</arg><arg>${numberOfSiteIdsToDownload}</arg>
|
||||
<arg>--downloadLaReferenciaLogs</arg><arg>${downloadLaReferenciaLogs}</arg>
|
||||
<arg>--processLaReferenciaLogs</arg><arg>${processLaReferenciaLogs}</arg>
|
||||
<arg>--irusCreateTablesEmptyDirs</arg><arg>${irusCreateTablesEmptyDirs}</arg>
|
||||
<arg>--irusDownloadReports</arg><arg>${irusDownloadReports}</arg>
|
||||
<arg>--irusProcessStats</arg><arg>${irusProcessStats}</arg>
|
||||
<arg>--irusNumberOfOpendoarsToDownload</arg><arg>${irusNumberOfOpendoarsToDownload}</arg>
|
||||
<capture-output/>
|
||||
</java>
|
||||
<ok to="End" />
|
||||
|
|
Loading…
Reference in New Issue