forked from D-Net/dnet-hadoop
Adding flags and time limits to Irus stats
This commit is contained in:
parent
9a1512004f
commit
8da64d8f54
|
@ -48,6 +48,11 @@ public class ExecuteWorkflow {
|
||||||
static boolean downloadLaReferenciaLogs;
|
static boolean downloadLaReferenciaLogs;
|
||||||
static boolean processLaReferenciaLogs;
|
static boolean processLaReferenciaLogs;
|
||||||
|
|
||||||
|
static boolean irusCreateTablesEmptyDirs;
|
||||||
|
static boolean irusDownloadReports;
|
||||||
|
static boolean irusProcessStats;
|
||||||
|
static int irusNumberOfOpendoarsToDownload;
|
||||||
|
|
||||||
public static void main(String args[]) throws Exception {
|
public static void main(String args[]) throws Exception {
|
||||||
|
|
||||||
// Sending the logs to the console
|
// Sending the logs to the console
|
||||||
|
@ -116,6 +121,20 @@ public class ExecuteWorkflow {
|
||||||
else
|
else
|
||||||
processLaReferenciaLogs = false;
|
processLaReferenciaLogs = false;
|
||||||
|
|
||||||
|
if (parser.get("irusCreateTablesEmptyDirs").toLowerCase().equals("true"))
|
||||||
|
irusCreateTablesEmptyDirs = true;
|
||||||
|
else
|
||||||
|
irusCreateTablesEmptyDirs = false;
|
||||||
|
if (parser.get("irusDownloadReports").toLowerCase().equals("true"))
|
||||||
|
irusDownloadReports = true;
|
||||||
|
else
|
||||||
|
irusDownloadReports = false;
|
||||||
|
if (parser.get("irusProcessStats").toLowerCase().equals("true"))
|
||||||
|
irusProcessStats = true;
|
||||||
|
else
|
||||||
|
irusProcessStats = false;
|
||||||
|
irusNumberOfOpendoarsToDownload = Integer.parseInt(parser.get("irusNumberOfOpendoarsToDownload"));
|
||||||
|
|
||||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
||||||
usagestatsExport.export();
|
usagestatsExport.export();
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,8 +8,10 @@ import java.sql.PreparedStatement;
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
@ -32,16 +34,22 @@ public class IrusStats {
|
||||||
|
|
||||||
public IrusStats(String irusUKURL) throws Exception {
|
public IrusStats(String irusUKURL) throws Exception {
|
||||||
this.irusUKURL = irusUKURL;
|
this.irusUKURL = irusUKURL;
|
||||||
logger.info("Creating Irus Stats tables");
|
|
||||||
createTables();
|
|
||||||
logger.info("Created Irus Stats tables");
|
|
||||||
// The following may not be needed - It will be created when JSON tables are created
|
// The following may not be needed - It will be created when JSON tables are created
|
||||||
// createTmpTables();
|
// createTmpTables();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void createTables() throws Exception {
|
public void reCreateLogDirs() throws Exception {
|
||||||
try {
|
FileSystem dfs = FileSystem.get(new Configuration());
|
||||||
|
|
||||||
|
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
||||||
|
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
|
||||||
|
|
||||||
|
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
||||||
|
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void createTables() throws Exception {
|
||||||
|
try {
|
||||||
logger.info("Creating sushilog");
|
logger.info("Creating sushilog");
|
||||||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||||
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||||
|
@ -198,17 +206,26 @@ public class IrusStats {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getIrusRRReport(String irusUKReportPath) throws Exception {
|
public void getIrusRRReport(String irusUKReportPath) throws Exception {
|
||||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM");
|
||||||
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate="
|
// Setting the starting period
|
||||||
+ simpleDateFormat.format(new Date())
|
Calendar start = (Calendar) ExecuteWorkflow.startingLogPeriod.clone();
|
||||||
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
logger.info("Starting period for log download: " + sdf.format(start.getTime()));
|
||||||
|
|
||||||
|
// Setting the ending period (last day of the month)
|
||||||
|
Calendar end = (Calendar) ExecuteWorkflow.endingLogPeriod.clone();
|
||||||
|
end.add(Calendar.MONTH, +1);
|
||||||
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
logger.info("Ending period for log download: " + sdf.format(end.getTime()));
|
||||||
|
|
||||||
|
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=" +
|
||||||
|
sdf.format(start.getTime()) + "&EndDate=" + sdf.format(end.getTime()) +
|
||||||
|
"&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
||||||
|
|
||||||
logger.info("(processIrusRRReport) Getting report: " + reportUrl);
|
logger.info("(processIrusRRReport) Getting report: " + reportUrl);
|
||||||
|
|
||||||
String text = getJson(reportUrl, "", "");
|
String text = getJson(reportUrl, "", "");
|
||||||
|
|
||||||
// log.info("Report: " + text);
|
List<String> opendoarsToVisit = new ArrayList<String>();
|
||||||
|
|
||||||
JSONParser parser = new JSONParser();
|
JSONParser parser = new JSONParser();
|
||||||
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||||
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||||
|
@ -224,6 +241,7 @@ public class IrusStats {
|
||||||
JSONObject opendoar = (JSONObject) identifier;
|
JSONObject opendoar = (JSONObject) identifier;
|
||||||
if (opendoar.get("Type").toString().equals("OpenDOAR")) {
|
if (opendoar.get("Type").toString().equals("OpenDOAR")) {
|
||||||
i++;
|
i++;
|
||||||
|
opendoarsToVisit.add(opendoar.get("Value").toString());
|
||||||
getIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
|
getIrusIRReport(opendoar.get("Value").toString(), irusUKReportPath);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -231,7 +249,22 @@ public class IrusStats {
|
||||||
// break;
|
// break;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("(processIrusRRReport) Finished with report: " + reportUrl);
|
logger.info("Found the following opendoars for download: " + opendoarsToVisit);
|
||||||
|
|
||||||
|
if (ExecuteWorkflow.irusNumberOfOpendoarsToDownload > 0 &&
|
||||||
|
ExecuteWorkflow.irusNumberOfOpendoarsToDownload <= opendoarsToVisit.size()) {
|
||||||
|
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.irusNumberOfOpendoarsToDownload);
|
||||||
|
opendoarsToVisit = opendoarsToVisit.subList(0, ExecuteWorkflow.irusNumberOfOpendoarsToDownload);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Downloading the followins opendoars: " + opendoarsToVisit);
|
||||||
|
|
||||||
|
for (String opendoar : opendoarsToVisit) {
|
||||||
|
logger.info("Now working on piwikId: " + opendoar);
|
||||||
|
this.getIrusIRReport(opendoar, irusUKReportPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Finished with report: " + reportUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void getIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
|
private void getIrusIRReport(String opendoar, String irusUKReportPath) throws Exception {
|
||||||
|
@ -242,13 +275,15 @@ public class IrusStats {
|
||||||
|
|
||||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||||
|
|
||||||
Calendar start = Calendar.getInstance();
|
// Setting the starting period
|
||||||
start.set(Calendar.YEAR, 2016);
|
Calendar start = (Calendar) ExecuteWorkflow.startingLogPeriod.clone();
|
||||||
start.set(Calendar.MONTH, Calendar.JANUARY);
|
logger.info("Starting period for log download: " + simpleDateFormat.format(start.getTime()));
|
||||||
// start.setTime(simpleDateFormat.parse("2016-01"));
|
|
||||||
|
|
||||||
Calendar end = Calendar.getInstance();
|
// Setting the ending period (last day of the month)
|
||||||
|
Calendar end = (Calendar) ExecuteWorkflow.endingLogPeriod.clone();
|
||||||
|
end.add(Calendar.MONTH, +1);
|
||||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
logger.info("Ending period for log download: " + simpleDateFormat.format(end.getTime()));
|
||||||
|
|
||||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
PreparedStatement st = ConnectDB
|
PreparedStatement st = ConnectDB
|
||||||
|
@ -264,10 +299,6 @@ public class IrusStats {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rs_date.close();
|
rs_date.close();
|
||||||
PreparedStatement preparedStatement = ConnectDB
|
|
||||||
.getHiveConnection()
|
|
||||||
.prepareStatement(
|
|
||||||
"INSERT INTO sushilogtmp (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
|
|
||||||
int batch_size = 0;
|
int batch_size = 0;
|
||||||
|
|
||||||
while (start.before(end)) {
|
while (start.before(end)) {
|
||||||
|
@ -310,7 +341,6 @@ public class IrusStats {
|
||||||
fin.close();
|
fin.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
preparedStatement.executeBatch();
|
|
||||||
ConnectDB.getHiveConnection().close();
|
ConnectDB.getHiveConnection().close();
|
||||||
|
|
||||||
logger.info("(processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
|
logger.info("(processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar);
|
||||||
|
|
|
@ -137,7 +137,7 @@ public class LaReferenciaDownloadLogs {
|
||||||
String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth;
|
String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth;
|
||||||
String content = "";
|
String content = "";
|
||||||
|
|
||||||
List<Integer> siteIdToVisit = new ArrayList<Integer>();
|
List<Integer> siteIdsToVisit = new ArrayList<Integer>();
|
||||||
|
|
||||||
// Getting all the siteIds in a list for logging reasons & limiting the list
|
// Getting all the siteIds in a list for logging reasons & limiting the list
|
||||||
// to the max number of siteIds
|
// to the max number of siteIds
|
||||||
|
@ -146,19 +146,19 @@ public class LaReferenciaDownloadLogs {
|
||||||
JSONArray jsonArray = (JSONArray) parser.parse(content);
|
JSONArray jsonArray = (JSONArray) parser.parse(content);
|
||||||
for (Object aJsonArray : jsonArray) {
|
for (Object aJsonArray : jsonArray) {
|
||||||
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||||
siteIdToVisit.add(Integer.parseInt(jsonObjectRow.get("idsite").toString()));
|
siteIdsToVisit.add(Integer.parseInt(jsonObjectRow.get("idsite").toString()));
|
||||||
}
|
}
|
||||||
logger.info("Found the following siteIds for download: " + siteIdToVisit);
|
logger.info("Found the following siteIds for download: " + siteIdsToVisit);
|
||||||
|
|
||||||
if (ExecuteWorkflow.numberOfPiwikIdsToDownload > 0 &&
|
if (ExecuteWorkflow.numberOfPiwikIdsToDownload > 0 &&
|
||||||
ExecuteWorkflow.numberOfPiwikIdsToDownload <= siteIdToVisit.size()) {
|
ExecuteWorkflow.numberOfPiwikIdsToDownload <= siteIdsToVisit.size()) {
|
||||||
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.numberOfPiwikIdsToDownload);
|
logger.info("Trimming siteIds list to the size of: " + ExecuteWorkflow.numberOfPiwikIdsToDownload);
|
||||||
siteIdToVisit = siteIdToVisit.subList(0, ExecuteWorkflow.numberOfPiwikIdsToDownload);
|
siteIdsToVisit = siteIdsToVisit.subList(0, ExecuteWorkflow.numberOfPiwikIdsToDownload);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Downloading from repos with the followins siteIds: " + siteIdToVisit);
|
logger.info("Downloading from repos with the followins siteIds: " + siteIdsToVisit);
|
||||||
|
|
||||||
for (int siteId : siteIdToVisit) {
|
for (int siteId : siteIdsToVisit) {
|
||||||
logger.info("Now working on piwikId: " + siteId);
|
logger.info("Now working on piwikId: " + siteId);
|
||||||
this.GetLaReFerenciaLogs(repoLogsPath, siteId);
|
this.GetLaReFerenciaLogs(repoLogsPath, siteId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,9 +60,6 @@ public class UsageStatsExporter {
|
||||||
logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath);
|
logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath);
|
||||||
dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true);
|
dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true);
|
||||||
|
|
||||||
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
|
||||||
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
|
|
||||||
|
|
||||||
logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
||||||
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true);
|
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true);
|
||||||
|
|
||||||
|
@ -78,9 +75,6 @@ public class UsageStatsExporter {
|
||||||
logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath);
|
logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath);
|
||||||
dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath));
|
dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath));
|
||||||
|
|
||||||
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
|
||||||
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
|
|
||||||
|
|
||||||
logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
||||||
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray));
|
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray));
|
||||||
|
|
||||||
|
@ -146,14 +140,28 @@ public class UsageStatsExporter {
|
||||||
lastats.processLogs();
|
lastats.processLogs();
|
||||||
logger.info("LaReferencia logs done");
|
logger.info("LaReferencia logs done");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IrusStats irusstats = new IrusStats(ExecuteWorkflow.irusUKBaseURL);
|
||||||
|
if (ExecuteWorkflow.irusCreateTablesEmptyDirs) {
|
||||||
|
logger.info("Creating Irus Stats tables");
|
||||||
|
irusstats.createTables();
|
||||||
|
logger.info("Created Irus Stats tables");
|
||||||
|
|
||||||
|
logger.info("Re-create log dirs");
|
||||||
|
irusstats.reCreateLogDirs();
|
||||||
|
logger.info("Re-created log dirs");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ExecuteWorkflow.irusDownloadReports) {
|
||||||
|
irusstats.getIrusRRReport(ExecuteWorkflow.irusUKReportPath);
|
||||||
|
}
|
||||||
|
if (ExecuteWorkflow.irusProcessStats) {
|
||||||
|
irusstats.processIrusStats();
|
||||||
|
logger.info("Irus done");
|
||||||
|
}
|
||||||
|
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
|
|
||||||
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
|
||||||
// irusstats.getIrusRRReport(irusUKReportPath);
|
|
||||||
|
|
||||||
// irusstats.processIrusStats();
|
|
||||||
// log.info("irus done");
|
|
||||||
|
|
||||||
// SarcStats sarcStats = new SarcStats();
|
// SarcStats sarcStats = new SarcStats();
|
||||||
// sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray);
|
// sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray);
|
||||||
// sarcStats.finalizeSarcStats();
|
// sarcStats.finalizeSarcStats();
|
||||||
|
|
|
@ -148,6 +148,29 @@
|
||||||
"paramLongName": "downloadLaReferenciaLogs",
|
"paramLongName": "downloadLaReferenciaLogs",
|
||||||
"paramDescription": "download La Referencia logs?",
|
"paramDescription": "download La Referencia logs?",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "icted",
|
||||||
|
"paramLongName": "irusCreateTablesEmptyDirs",
|
||||||
|
"paramDescription": "Irus section: Create tables and empty JSON directories?",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "idr",
|
||||||
|
"paramLongName": "irusDownloadReports",
|
||||||
|
"paramDescription": "Irus section: Download reports?",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ipr",
|
||||||
|
"paramLongName": "irusProcessStats",
|
||||||
|
"paramDescription": "Irus section: Process stats?",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "inod",
|
||||||
|
"paramLongName": "irusNumberOfOpendoarsToDownload",
|
||||||
|
"paramDescription": "Limit the number of the downloaded Opendoars (Irus) to the first irusNumberOfOpendoarsToDownload",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,10 @@
|
||||||
<arg>--numberOfSiteIdsToDownload</arg><arg>${numberOfSiteIdsToDownload}</arg>
|
<arg>--numberOfSiteIdsToDownload</arg><arg>${numberOfSiteIdsToDownload}</arg>
|
||||||
<arg>--downloadLaReferenciaLogs</arg><arg>${downloadLaReferenciaLogs}</arg>
|
<arg>--downloadLaReferenciaLogs</arg><arg>${downloadLaReferenciaLogs}</arg>
|
||||||
<arg>--processLaReferenciaLogs</arg><arg>${processLaReferenciaLogs}</arg>
|
<arg>--processLaReferenciaLogs</arg><arg>${processLaReferenciaLogs}</arg>
|
||||||
|
<arg>--irusCreateTablesEmptyDirs</arg><arg>${irusCreateTablesEmptyDirs}</arg>
|
||||||
|
<arg>--irusDownloadReports</arg><arg>${irusDownloadReports}</arg>
|
||||||
|
<arg>--irusProcessStats</arg><arg>${irusProcessStats}</arg>
|
||||||
|
<arg>--irusNumberOfOpendoarsToDownload</arg><arg>${irusNumberOfOpendoarsToDownload}</arg>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</java>
|
</java>
|
||||||
<ok to="End" />
|
<ok to="End" />
|
||||||
|
|
Loading…
Reference in New Issue