diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java index 5245a4a796..c0fa2eedb3 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/SarcStats.java @@ -8,6 +8,7 @@ import java.net.URL; import java.net.URLConnection; import java.sql.PreparedStatement; import java.sql.ResultSet; +import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.Calendar; @@ -65,10 +66,108 @@ public class SarcStats { } } - public void processSarc() { - + public void processSarc() throws Exception { + Statement stmt = ConnectDB.getConnection().createStatement(); + ConnectDB.getConnection().setAutoCommit(false); + + System.out.println("====> Adding JSON Serde jar"); + stmt.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar"); + System.out.println("====> Added JSON Serde jar"); + + System.out.println("====> Dropping sarc_sushilogtmp_json_array table"); + String drop_sarc_sushilogtmp_json_array = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".sarc_sushilogtmp_json_array"; + stmt.executeUpdate(drop_sarc_sushilogtmp_json_array); + System.out.println("====> Dropped sarc_sushilogtmp_json_array table"); + + System.out.println("====> Creating sarc_sushilogtmp_json_array table"); + String create_sarc_sushilogtmp_json_array = "CREATE EXTERNAL TABLE IF NOT EXISTS " + + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_array(\n" + + " `ItemIdentifier` ARRAY<\n" + + " struct<\n" + + " `Type`: STRING,\n" + + " `Value`: STRING\n" + + " >\n" + + " >,\n" + + " `ItemPerformance` struct<\n" + + " `Period`: struct<\n" + + " `Begin`: STRING,\n" + + " `End`: STRING\n" + + " >,\n" + + " `Instance`: struct<\n" + + " `Count`: STRING,\n" + + " `MetricType`: STRING\n" + + " >\n" + + " >\n" + + ")" + + "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" + + "LOCATION '" + UsageStatsExporter.sarcsReportPathArray + "'\n" + + "TBLPROPERTIES (\"transactional\"=\"false\")"; + stmt.executeUpdate(create_sarc_sushilogtmp_json_array); + System.out.println("====> Created sarc_sushilogtmp_json_array table"); + + System.out.println("====> Dropping sarc_sushilogtmp_json_non_array table"); + String drop_sarc_sushilogtmp_json_non_array = "DROP TABLE IF EXISTS " + + ConnectDB.getUsageStatsDBSchema() + + ".sarc_sushilogtmp_json_non_array"; + stmt.executeUpdate(drop_sarc_sushilogtmp_json_non_array); + System.out.println("====> Dropped sarc_sushilogtmp_json_non_array table"); + + System.out.println("====> Creating sarc_sushilogtmp_json_non_array table"); + String create_sarc_sushilogtmp_json_non_array = "CREATE EXTERNAL TABLE IF NOT EXISTS " + + ConnectDB.getUsageStatsDBSchema() + ".sarc_sushilogtmp_json_non_array(\n" + + " `ItemIdentifier` struct<\n" + + " `Type`: STRING,\n" + + " `Value`: STRING\n" + + " >,\n" + + " `ItemPerformance` struct<\n" + + " `Period`: struct<\n" + + " `Begin`: STRING,\n" + + " `End`: STRING\n" + + " >,\n" + + " `Instance`: struct<\n" + + " `Count`: STRING,\n" + + " `MetricType`: STRING\n" + + " >\n" + + " >" + + ")" + + "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" + + "LOCATION '" + UsageStatsExporter.sarcsReportPathNonArray + "'\n" + + "TBLPROPERTIES (\"transactional\"=\"false\")"; + stmt.executeUpdate(create_sarc_sushilogtmp_json_non_array); + System.out.println("====> Created sarc_sushilogtmp_json_non_array table"); + +// System.out.println("====> Dropping sushilogtmp table"); +// String drop_sushilogtmp = "DROP TABLE IF EXISTS " + +// ConnectDB.getUsageStatsDBSchema() + +// ".sushilogtmp"; +// stmt.executeUpdate(drop_sushilogtmp); +// System.out.println("====> Dropped sushilogtmp table"); +// +// System.out.println("====> Creating sushilogtmp table"); +// String create_sushilogtmp = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() +// + ".sushilogtmp(source STRING, repository STRING, " + +// "rid STRING, date STRING, metric_type STRING, count INT) clustered by (source) into 100 buckets stored as orc " +// + +// "tblproperties('transactional'='true')"; +// stmt.executeUpdate(create_sushilogtmp); +// System.out.println("====> Created sushilogtmp table"); +// +// System.out.println("====> Inserting to sushilogtmp table"); +// String insert_sushilogtmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp " + +// "SELECT 'IRUS-UK', 'opendoar____::', `ItemIdent`.`Value`, `ItemPerf`.`Period`.`Begin`, " + +// "`ItemPerf`.`Instance`.`MetricType`, `ItemPerf`.`Instance`.`Count` " + +// "FROM " + ConnectDB.getUsageStatsDBSchema() + ".sushilogtmp_json " + +// "LATERAL VIEW posexplode(ItemIdentifier) ItemIdentifierTable AS seqi, ItemIdent " + +// "LATERAL VIEW posexplode(ItemPerformance) ItemPerformanceTable AS seqp, ItemPerf " + +// "WHERE `ItemIdent`.`Type`= 'OAI'"; +// stmt.executeUpdate(insert_sushilogtmp); +// System.out.println("====> Inserted to sushilogtmp table"); + + ConnectDB.getConnection().close(); } - + public void getSarc(String sarcsReportPathArray, String sarcsReportPathNonArray) throws Exception { // There was a problem to download the following file getARReport( diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 354802a24e..3d675ccfe7 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -60,8 +60,9 @@ public class UsageStatsExporter { // log.info("irus done"); SarcStats sarcStats = new SarcStats(); - sarcStats.getSarc(sarcsReportPathArray, sarcsReportPathNonArray); - sarcStats.sarcStats(); +// sarcStats.getSarc(sarcsReportPathArray, sarcsReportPathNonArray); + sarcStats.processSarc(); +// sarcStats.sarcStats(); log.info("sarc done"); // // finalize usagestats