From a852dd3a0d9c7c644b66e0ef470b1ffb330c4a0e Mon Sep 17 00:00:00 2001 From: Spyros Zoupanos Date: Fri, 9 Oct 2020 22:28:45 +0300 Subject: [PATCH] Making tables visible to Impala --- .../graph/usagestats/export/PiwikStatsDB.java | 90 +++---------------- .../usagestats/export/UsageStatsExporter.java | 40 ++++++++- 2 files changed, 47 insertions(+), 83 deletions(-) diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index c88601a6b..897703756 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -4,24 +4,16 @@ package eu.dnetlib.oa.graph.usagestats.export; import java.io.*; import java.net.URLDecoder; import java.sql.Connection; -import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; -import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Stream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; -import org.json.simple.JSONArray; -import org.json.simple.JSONObject; -import org.json.simple.parser.JSONParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -511,83 +503,21 @@ public class PiwikStatsDB { int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR); int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH); -// String sql = "SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date INTO full_dates FROM generate_series(0, " + diffMonth + ", 1) AS offs;"; - String sql = "CREATE TABLE IF NOT EXISTS full_dates AS SELECT to_char(date_trunc('month', ('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " - + diffMonth + ", 1) AS offs;"; + String sql = "CREATE TABLE IF NOT EXISTS full_dates AS SELECT to_char(date_trunc('month', " + + "('2016-01-01'::date + interval '1 month'*offs)), 'YYYY/MM') AS full_date FROM generate_series(0, " + + diffMonth + ", 1) AS offs"; stmt.executeUpdate(sql); - sql = "CREATE INDEX IF NOT EXISTS full_dates_full_date ON full_dates USING btree(full_date);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS views_stats_source ON views_stats USING btree(source);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS views_stats_repository_id ON views_stats USING btree(repository_id);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS views_stats_result_id ON views_stats USING btree(result_id);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS views_stats_date ON views_stats USING btree(date);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS pageviews_stats_source ON pageviews_stats USING btree(source);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS pageviews_stats_repository_id ON pageviews_stats USING btree(repository_id);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS pageviews_stats_result_id ON pageviews_stats USING btree(result_id);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS pageviews_stats_date ON pageviews_stats USING btree(date);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS downloads_stats_source ON downloads_stats USING btree(source);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS downloads_stats_repository_id ON downloads_stats USING btree(repository_id);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS downloads_stats_result_id ON downloads_stats USING btree(result_id);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS downloads_stats_date ON downloads_stats USING btree(date);"; - stmt.executeUpdate(sql); - -// sql = "SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views INTO usage_stats FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; - sql = "CREATE TABLE IF NOT EXISTS usage_stats AS SELECT coalesce(ds.source, vs.source) as source, coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS usage_stats_source ON usage_stats USING btree(source);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS usage_stats_repository_id ON usage_stats USING btree(repository_id);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS usage_stats_result_id ON usage_stats USING btree(result_id);"; - stmt.executeUpdate(sql); - - sql = "CREATE INDEX IF NOT EXISTS usage_stats_date ON usage_stats USING btree(date);"; - stmt.executeUpdate(sql); - - sql = "DROP TABLE IF EXISTS process_portal_log_tmp;"; - stmt.executeUpdate(sql); - - sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;"; - stmt.executeUpdate(sql); - - sql = "DROP VIEW IF EXISTS result_views_monthly_tmp"; - stmt.executeUpdate(sql); - - sql = "DROP TABLE IF EXISTS piwiklogtmp;"; - stmt.executeUpdate(sql); - - sql = "DROP TABLE IF EXISTS sushilogtmp;"; + sql = "CREATE TABLE IF NOT EXISTS usage_stats AS SELECT coalesce(ds.source, vs.source) as source, " + + "coalesce(ds.repository_id, vs.repository_id) as repository_id, coalesce(ds.result_id, vs.result_id) as result_id, " + + + "coalesce(ds.date, vs.date) as date, coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, " + + "coalesce(ds.openaire, 0) as openaire_downloads, coalesce(vs.openaire, 0) as openaire_views " + + "FROM downloads_stats AS ds FULL OUTER JOIN views_stats AS vs ON ds.source=vs.source " + + "AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date"; stmt.executeUpdate(sql); stmt.close(); - ConnectDB.getHiveConnection().commit(); ConnectDB.getHiveConnection().close(); } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 2c40a84d1..95268b241 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -3,7 +3,10 @@ package eu.dnetlib.oa.graph.usagestats.export; import java.io.IOException; import java.sql.ResultSet; +import java.sql.SQLException; import java.sql.Statement; +import java.text.SimpleDateFormat; +import java.util.Calendar; import javax.sound.midi.SysexMessage; @@ -161,8 +164,39 @@ public class UsageStatsExporter { } logger.info("Sarc done"); -// // finalize usagestats -// piwikstatsdb.finalizeStats(); -// log.info("finalized stats"); + // finalize usagestats + piwikstatsdb.finalizeStats(); + logger.info("Finalized stats"); + + // Make the tables available to Impala + logger.info("Making tables visible to Impala"); + invalidateMetadata(); + + logger.info("End"); } + + private void invalidateMetadata() throws SQLException { + Statement stmt = null; + + stmt = ConnectDB.getImpalaConnection().createStatement(); + + String sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".piwiklog"; + stmt.executeUpdate(sql); + + sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".sushilog"; + stmt.executeUpdate(sql); + + sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats"; + stmt.executeUpdate(sql); + + sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".views_stats"; + stmt.executeUpdate(sql); + + sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialog"; + stmt.executeUpdate(sql); + + stmt.close(); + ConnectDB.getHiveConnection().close(); + } + }