From 8b08f35dfe0c3101d1f8752c15d266c3fe3ae231 Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Thu, 8 Oct 2020 18:53:23 +0300 Subject: [PATCH] Corrections for Irus stats after discussing with Dimitris --- .../usagestats/export/ExecuteWorkflow.java | 5 ++--- .../oa/graph/usagestats/export/IrusStats.java | 22 +++++++++++++++---- .../oa/graph/usagestats/export/SarcStats.java | 6 ++--- .../usagestats/export/UsageStatsExporter.java | 2 +- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java index a4891d788..a48f945f3 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java @@ -57,7 +57,7 @@ public class ExecuteWorkflow { static boolean sarcDownloadReports; static boolean sarcProcessStats; static int sarcNumberOfOpendoarsToDownload; - + public static void main(String args[]) throws Exception { // Sending the logs to the console @@ -140,7 +140,6 @@ public class ExecuteWorkflow { irusProcessStats = false; irusNumberOfOpendoarsToDownload = Integer.parseInt(parser.get("irusNumberOfOpendoarsToDownload")); - if (parser.get("sarcCreateTablesEmptyDirs").toLowerCase().equals("true")) sarcCreateTablesEmptyDirs = true; else @@ -154,7 +153,7 @@ public class ExecuteWorkflow { else sarcProcessStats = false; sarcNumberOfOpendoarsToDownload = Integer.parseInt(parser.get("sarcNumberOfOpendoarsToDownload")); - + UsageStatsExporter usagestatsExport = new UsageStatsExporter(); usagestatsExport.export(); } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java index 1b52d1cc5..b22f73208 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java @@ -169,7 +169,8 @@ public class IrusStats { logger.info("Inserting to irus_sushilogtmp table"); String insertSushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".irus_sushilogtmp " + - "SELECT 'IRUS-UK', 'opendoar____::', `ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " + + "SELECT 'IRUS-UK', CONCAT('opendoar____::', split(split(INPUT__FILE__NAME,'IrusIRReport_')[1],'_')[0]), " + + "`ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " + "`ItemPerf`.`Instance`.`MetricType`, `ItemPerf`.`Instance`.`Count` " + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".irus_sushilogtmp_json " + "LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " + @@ -201,13 +202,26 @@ public class IrusStats { stmt.executeUpdate(insertDStats); logger.info("Inserted into downloads_stats"); + logger.info("Creating sushilog table"); + String createSushilog = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + + ".sushilog " + + "(`source` string, " + + "`repository_id` string, " + + "`rid` string, " + + "`date` string, " + + "`metric_type` string, " + + "`count` int)"; + stmt.executeUpdate(createSushilog); + logger.info("Created sushilog table"); + + logger.info("Inserting to sushilog table"); String insertToShushilog = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sushilog SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".irus_sushilogtmp"; stmt.executeUpdate(insertToShushilog); + logger.info("Inserted to sushilog table"); ConnectDB.getHiveConnection().close(); - } public void getIrusRRReport(String irusUKReportPath) throws Exception { @@ -313,7 +327,7 @@ public class IrusStats { + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback="; start.add(Calendar.MONTH, 1); - System.out.println("Downloading file: " + reportUrl); + logger.info("Downloading file: " + reportUrl); String text = getJson(reportUrl, "", ""); if (text == null) { continue; @@ -322,7 +336,7 @@ public class IrusStats { FileSystem fs = FileSystem.get(new Configuration()); String filePath = irusUKReportPath + "/" + "IrusIRReport_" + opendoar + "_" + simpleDateFormat.format(start.getTime()) + ".json"; - System.out.println("Storing to file: " + filePath); + logger.info("Storing to file: " + filePath); FSDataOutputStream fin = fs.create(new Path(filePath), true); JSONParser parser = new JSONParser(); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java index c9c224a55..d42c9ce0b 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java @@ -71,20 +71,20 @@ public class SarcStats { public void reCreateLogDirs() throws IOException { FileSystem dfs = FileSystem.get(new Configuration()); - + logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray); dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true); logger.info("Deleting sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray); dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathNonArray), true); - + logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray); dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray)); logger.info("Creating sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray); dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathNonArray)); } - + public void processSarc(String sarcsReportPathArray, String sarcsReportPathNonArray, String url, String issn) throws Exception { Statement stmt = ConnectDB.getHiveConnection().createStatement(); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 4d66b832e..24f292df1 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -149,7 +149,7 @@ public class UsageStatsExporter { } System.exit(0); - + SarcStats sarcStats = new SarcStats(); if (ExecuteWorkflow.sarcCreateTablesEmptyDirs) { sarcStats.reCreateLogDirs();