forked from D-Net/dnet-hadoop
Corrections for Irus stats after discussing with Dimitris
This commit is contained in:
parent
715bbd487d
commit
8b08f35dfe
|
@ -57,7 +57,7 @@ public class ExecuteWorkflow {
|
|||
static boolean sarcDownloadReports;
|
||||
static boolean sarcProcessStats;
|
||||
static int sarcNumberOfOpendoarsToDownload;
|
||||
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
|
||||
// Sending the logs to the console
|
||||
|
@ -140,7 +140,6 @@ public class ExecuteWorkflow {
|
|||
irusProcessStats = false;
|
||||
irusNumberOfOpendoarsToDownload = Integer.parseInt(parser.get("irusNumberOfOpendoarsToDownload"));
|
||||
|
||||
|
||||
if (parser.get("sarcCreateTablesEmptyDirs").toLowerCase().equals("true"))
|
||||
sarcCreateTablesEmptyDirs = true;
|
||||
else
|
||||
|
@ -154,7 +153,7 @@ public class ExecuteWorkflow {
|
|||
else
|
||||
sarcProcessStats = false;
|
||||
sarcNumberOfOpendoarsToDownload = Integer.parseInt(parser.get("sarcNumberOfOpendoarsToDownload"));
|
||||
|
||||
|
||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
||||
usagestatsExport.export();
|
||||
}
|
||||
|
|
|
@ -169,7 +169,8 @@ public class IrusStats {
|
|||
|
||||
logger.info("Inserting to irus_sushilogtmp table");
|
||||
String insertSushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".irus_sushilogtmp " +
|
||||
"SELECT 'IRUS-UK', 'opendoar____::', `ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " +
|
||||
"SELECT 'IRUS-UK', CONCAT('opendoar____::', split(split(INPUT__FILE__NAME,'IrusIRReport_')[1],'_')[0]), " +
|
||||
"`ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " +
|
||||
"`ItemPerf`.`Instance`.`MetricType`, `ItemPerf`.`Instance`.`Count` " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".irus_sushilogtmp_json " +
|
||||
"LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " +
|
||||
|
@ -201,13 +202,26 @@ public class IrusStats {
|
|||
stmt.executeUpdate(insertDStats);
|
||||
logger.info("Inserted into downloads_stats");
|
||||
|
||||
logger.info("Creating sushilog table");
|
||||
String createSushilog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".sushilog " +
|
||||
"(`source` string, " +
|
||||
"`repository_id` string, " +
|
||||
"`rid` string, " +
|
||||
"`date` string, " +
|
||||
"`metric_type` string, " +
|
||||
"`count` int)";
|
||||
stmt.executeUpdate(createSushilog);
|
||||
logger.info("Created sushilog table");
|
||||
|
||||
logger.info("Inserting to sushilog table");
|
||||
String insertToShushilog = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sushilog SELECT * FROM " +
|
||||
ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".irus_sushilogtmp";
|
||||
stmt.executeUpdate(insertToShushilog);
|
||||
logger.info("Inserted to sushilog table");
|
||||
|
||||
ConnectDB.getHiveConnection().close();
|
||||
|
||||
}
|
||||
|
||||
public void getIrusRRReport(String irusUKReportPath) throws Exception {
|
||||
|
@ -313,7 +327,7 @@ public class IrusStats {
|
|||
+ "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
|
||||
start.add(Calendar.MONTH, 1);
|
||||
|
||||
System.out.println("Downloading file: " + reportUrl);
|
||||
logger.info("Downloading file: " + reportUrl);
|
||||
String text = getJson(reportUrl, "", "");
|
||||
if (text == null) {
|
||||
continue;
|
||||
|
@ -322,7 +336,7 @@ public class IrusStats {
|
|||
FileSystem fs = FileSystem.get(new Configuration());
|
||||
String filePath = irusUKReportPath + "/" + "IrusIRReport_" +
|
||||
opendoar + "_" + simpleDateFormat.format(start.getTime()) + ".json";
|
||||
System.out.println("Storing to file: " + filePath);
|
||||
logger.info("Storing to file: " + filePath);
|
||||
FSDataOutputStream fin = fs.create(new Path(filePath), true);
|
||||
|
||||
JSONParser parser = new JSONParser();
|
||||
|
|
|
@ -71,20 +71,20 @@ public class SarcStats {
|
|||
|
||||
public void reCreateLogDirs() throws IOException {
|
||||
FileSystem dfs = FileSystem.get(new Configuration());
|
||||
|
||||
|
||||
logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
||||
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true);
|
||||
|
||||
logger.info("Deleting sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray);
|
||||
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathNonArray), true);
|
||||
|
||||
|
||||
logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
||||
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray));
|
||||
|
||||
logger.info("Creating sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray);
|
||||
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathNonArray));
|
||||
}
|
||||
|
||||
|
||||
public void processSarc(String sarcsReportPathArray, String sarcsReportPathNonArray,
|
||||
String url, String issn) throws Exception {
|
||||
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
||||
|
|
|
@ -149,7 +149,7 @@ public class UsageStatsExporter {
|
|||
}
|
||||
|
||||
System.exit(0);
|
||||
|
||||
|
||||
SarcStats sarcStats = new SarcStats();
|
||||
if (ExecuteWorkflow.sarcCreateTablesEmptyDirs) {
|
||||
sarcStats.reCreateLogDirs();
|
||||
|
|
Loading…
Reference in New Issue