diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java index 1f94db0558..a77e3a09ea 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java @@ -83,38 +83,108 @@ public class IrusStats { } } - // The following may not be needed - It will be created when JSON tables are created - private void createTmpTables() throws Exception { - try { +// // The following may not be needed - It will be created when JSON tables are created +// private void createTmpTables() throws Exception { +// try { +// +// Statement stmt = ConnectDB.getConnection().createStatement(); +// String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilogtmp(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));"; +// stmt.executeUpdate(sqlCreateTableSushiLog); +// +// // stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog AS TABLE sushilog;"); +// // String sqlCopyPublicSushiLog = "INSERT INTO sushilog SELECT * FROM public.sushilog;"; +// // stmt.executeUpdate(sqlCopyPublicSushiLog); +// String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +// + " ON INSERT TO sushilogtmp " +// + " WHERE (EXISTS ( SELECT sushilogtmp.source, sushilogtmp.repository," +// + "sushilogtmp.rid, sushilogtmp.date " +// + "FROM sushilogtmp " +// + "WHERE sushilogtmp.source = new.source AND sushilogtmp.repository = new.repository AND sushilogtmp.rid = new.rid AND sushilogtmp.date = new.date AND sushilogtmp.metric_type = new.metric_type)) DO INSTEAD NOTHING;"; +// stmt.executeUpdate(sqlcreateRuleSushiLog); +// +// stmt.close(); +// ConnectDB.getConnection().close(); +// log.info("Sushi Tmp Tables Created"); +// } catch (Exception e) { +// log.error("Failed to create tables: " + e); +// throw new Exception("Failed to create tables: " + e.toString(), e); +// } +// } - Statement stmt = ConnectDB.getConnection().createStatement(); - String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilogtmp(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));"; - stmt.executeUpdate(sqlCreateTableSushiLog); - - // stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog AS TABLE sushilog;"); - // String sqlCopyPublicSushiLog = "INSERT INTO sushilog SELECT * FROM public.sushilog;"; - // stmt.executeUpdate(sqlCopyPublicSushiLog); - String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " - + " ON INSERT TO sushilogtmp " - + " WHERE (EXISTS ( SELECT sushilogtmp.source, sushilogtmp.repository," - + "sushilogtmp.rid, sushilogtmp.date " - + "FROM sushilogtmp " - + "WHERE sushilogtmp.source = new.source AND sushilogtmp.repository = new.repository AND sushilogtmp.rid = new.rid AND sushilogtmp.date = new.date AND sushilogtmp.metric_type = new.metric_type)) DO INSTEAD NOTHING;"; - stmt.executeUpdate(sqlcreateRuleSushiLog); - - stmt.close(); - ConnectDB.getConnection().close(); - log.info("Sushi Tmp Tables Created"); - } catch (Exception e) { - log.error("Failed to create tables: " + e); - throw new Exception("Failed to create tables: " + e.toString(), e); - } - } - - public void irusStats() throws Exception { + public void processIrusStats() throws Exception { Statement stmt = ConnectDB.getConnection().createStatement(); ConnectDB.getConnection().setAutoCommit(false); + System.out.println("====> Adding JSON Serde jar"); + stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); + System.out.println("====> Added JSON Serde jar"); + + System.out.println("====> Dropping sushilogtmp_json table"); + String drop_sushilogtmp_json = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".sushilogtmp_json"; + stmt.executeUpdate(drop_sushilogtmp_json); + System.out.println("====> Dropped sushilogtmp_json table"); + + System.out.println("====> Creating sushilogtmp_json table"); + String create_sushilogtmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " + + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp_json(\n" + + " `ItemIdentifier` ARRAY<\n" + + " struct<\n" + + " Type: STRING,\n" + + " Value: STRING\n" + + " >\n" + + " >,\n" + + " `ItemPerformance` ARRAY<\n" + + " struct<\n" + + " `Period`: struct<\n" + + " `Begin`: STRING,\n" + + " `End`: STRING\n" + + " >,\n" + + " `Instance`: struct<\n" + + " `Count`: STRING,\n" + + " `MetricType`: STRING\n" + + " >\n" + + " >\n" + + " >\n" + + ")\n" + + "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" + + "LOCATION '" + UsageStatsExporter.irusUKReportPath + "'\n" + + "TBLPROPERTIES (\"transactional\"=\"false\")"; + stmt.executeUpdate(create_sushilogtmp_json); + System.out.println("====> Created sushilogtmp_json table"); + + System.out.println("====> Dropping sushilogtmp table"); + String drop_sushilogtmp = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".sushilogtmp"; + stmt.executeUpdate(drop_sushilogtmp); + System.out.println("====> Dropped sushilogtmp table"); + + System.out.println("====> Creating sushilogtmp table"); + String create_sushilogtmp = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + + ".sushilogtmp(source STRING, repository STRING, " + + "rid STRING, date STRING, metric_type STRING, count INT) clustered by (source) into 100 buckets stored as orc " + + + "tblproperties('transactional'='true')"; + stmt.executeUpdate(create_sushilogtmp); + System.out.println("====> Created sushilogtmp table"); + + + System.out.println("====> Inserting to sushilogtmp table"); + String insert_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp " + + "SELECT 'IRUS-UK', 'opendoar____::', `ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " + + "`ItemPerf`.`Instance`.`MetricType`, `ItemPerf`.`Instance`.`Count` " + + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp_json " + + "LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " + + "LATERAL VIEW posexplode(ItemPerformance) ItemPerformanceTable AS seqp, ItemPerf " + + "WHERE `ItemIdent`.`Type`= 'OAI'"; + stmt.executeUpdate(insert_sushilogtmp); + System.out.println("====> Inserted to sushilogtmp table"); + + ConnectDB.getConnection().close(); + System.exit(0); + // String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count // FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND // metric_type='ft_total'"; @@ -132,7 +202,6 @@ public class IrusStats { sql = "Insert into sushilog select * from sushilogtmp;"; stmt.executeUpdate(sql); - ConnectDB.getConnection().commit(); ConnectDB.getConnection().close(); } @@ -172,7 +241,7 @@ public class IrusStats { } // break; } - + System.out.println("====> (processIrusRRReport) Finished with report: " + reportUrl); } @@ -254,7 +323,7 @@ public class IrusStats { preparedStatement.executeBatch(); ConnectDB.getConnection().close(); - + System.out.println("====> (processIrusIRReport) Finished downloading report(s) with opendoar: " + opendoar); } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index 3f86472b70..e9d73c6013 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -191,7 +191,7 @@ public class PiwikStatsDB { System.out.println("====> DownloadsStats processing starts"); System.out.println("====> Processing portal logs"); -// processPortalLog(); + processPortalLog(); System.out.println("====> Portal logs process done"); log.info("portal process done"); @@ -717,6 +717,10 @@ public class PiwikStatsDB { Statement stmt = ConnectDB.getConnection().createStatement(); ConnectDB.getConnection().setAutoCommit(false); + System.out.println("====> Adding JSON Serde jar"); + stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); + System.out.println("====> Added JSON Serde jar"); + System.out.println("====> Dropping process_portal_log_tmp_json table"); String drop_process_portal_log_tmp_json = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 91135fd0e4..c2ff7df1f7 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -44,14 +44,6 @@ public class UsageStatsExporter { // piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID); System.out.println("====> Downloaded logs"); - // Adding JSON Serde jar needed for creating tables over JSON files - Statement stmt = ConnectDB.getConnection().createStatement(); - ConnectDB.getConnection().setAutoCommit(false); - System.out.println("====> Adding JSON Serde jar"); - stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); - System.out.println("====> Added JSON Serde jar"); - stmt.close(); - // Create DB tables, insert/update statistics // String cRobotsUrl = properties.getProperty("COUNTER_robots_Url"); String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json"; @@ -61,15 +53,15 @@ public class UsageStatsExporter { log.info("process logs done"); IrusStats irusstats = new IrusStats(irusUKBaseURL); -// irusstats.processIrusRRReport(irusUKReportPath); +// irusstats.getIrusRRReport(irusUKReportPath); - irusstats.irusStats(); + irusstats.processIrusStats(); // log.info("irus done"); System.exit(0); - SarcStats sarcStats = new SarcStats(); - sarcStats.processSarc(sarcsReportPath); +// SarcStats sarcStats = new SarcStats(); +// sarcStats.processSarc(sarcsReportPath); // sarcStats.sarcStats(); log.info("sarc done");