diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaDownloadLogs.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaDownloadLogs.java index 5559169a77..6efe5697dc 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaDownloadLogs.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaDownloadLogs.java @@ -1,10 +1,5 @@ -package eu.dnetlib.oa.graph.usagestats.export; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.FileSystem; -import org.apache.log4j.Logger; +package eu.dnetlib.oa.graph.usagestats.export; import java.io.*; import java.net.URL; @@ -13,182 +8,211 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.Statement; import java.text.SimpleDateFormat; -import java.util.Date; import java.util.Calendar; +import java.util.Date; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.Logger; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; public class LaReferenciaDownloadLogs { - private final String piwikUrl; - private Date startDate; - private final String tokenAuth; + private final String piwikUrl; + private Date startDate; + private final String tokenAuth; - /* - The Piwik's API method - */ - private final String APImethod = "?module=API&method=Live.getLastVisitsDetails"; - private final String format = "&format=json"; - private final String ApimethodGetAllSites = "?module=API&method=SitesManager.getSitesWithViewAccess"; + /* + * The Piwik's API method + */ + private final String APImethod = "?module=API&method=Live.getLastVisitsDetails"; + private final String format = "&format=json"; + private final String ApimethodGetAllSites = "?module=API&method=SitesManager.getSitesWithViewAccess"; - private final Logger log = Logger.getLogger(this.getClass()); + private final Logger log = Logger.getLogger(this.getClass()); - public LaReferenciaDownloadLogs(String piwikUrl, String tokenAuth) throws Exception { - this.piwikUrl = piwikUrl; - this.tokenAuth = tokenAuth; - this.createTables(); - this.createTmpTables(); - } - private void createTables() throws Exception { - try { - Statement stmt = ConnectDB.getConnection().createStatement(); - String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialog(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));"; - String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " - + " ON INSERT TO lareferencialog " - + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit," - + "lareferencialog.action, lareferencialog.\"timestamp\", lareferencialog.entity_id " - + "FROM lareferencialog " - + "WHERE lareferencialog.matomoid=new.matomoid AND lareferencialog.source = new.source AND lareferencialog.id_visit = new.id_visit AND lareferencialog.action = new.action AND lareferencialog.entity_id = new.entity_id AND lareferencialog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; - String sqlCreateRuleIndexLaReferenciaLog = "create index if not exists lareferencialog_rule on lareferencialog(matomoid, source, id_visit, action, entity_id, \"timestamp\");"; - stmt.executeUpdate(sqlCreateTableLareferenciaLog); - stmt.executeUpdate(sqlcreateRuleLaReferenciaLog); - stmt.executeUpdate(sqlCreateRuleIndexLaReferenciaLog); + public LaReferenciaDownloadLogs(String piwikUrl, String tokenAuth) throws Exception { + this.piwikUrl = piwikUrl; + this.tokenAuth = tokenAuth; + this.createTables(); +// this.createTmpTables(); + } - stmt.close(); - ConnectDB.getConnection().close(); - log.info("Lareferencia Tables Created"); + private void createTables() throws Exception { + try { + Statement stmt = ConnectDB.getConnection().createStatement(); - } catch (Exception e) { - log.error("Failed to create tables: " + e); - throw new Exception("Failed to create tables: " + e.toString(), e); - //System.exit(0); - } - } + System.out.println("====> Creating LaReferencia tables"); + String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS " + + ConnectDB.getUsageStatsDBSchema() + ".lareferencialog(matomoid INT, " + + "source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, " + + "source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " + + "clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets " + + "stored as orc tblproperties('transactional'='true')"; + stmt.executeUpdate(sqlCreateTableLareferenciaLog); + System.out.println("====> Created LaReferencia tables"); +// String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " +// + " ON INSERT TO lareferencialog " +// + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit," +// + "lareferencialog.action, lareferencialog.\"timestamp\", lareferencialog.entity_id " +// + "FROM lareferencialog " +// + "WHERE lareferencialog.matomoid=new.matomoid AND lareferencialog.source = new.source AND lareferencialog.id_visit = new.id_visit AND lareferencialog.action = new.action AND lareferencialog.entity_id = new.entity_id AND lareferencialog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; +// String sqlCreateRuleIndexLaReferenciaLog = "create index if not exists lareferencialog_rule on lareferencialog(matomoid, source, id_visit, action, entity_id, \"timestamp\");"; +// stmt.executeUpdate(sqlcreateRuleLaReferenciaLog); +// stmt.executeUpdate(sqlCreateRuleIndexLaReferenciaLog); - private void createTmpTables() throws Exception { + stmt.close(); + ConnectDB.getConnection().close(); + log.info("Lareferencia Tables Created"); - try { - Statement stmt = ConnectDB.getConnection().createStatement(); - String sqlCreateTmpTableLaReferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialogtmp(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));"; - String sqlcreateTmpRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " - + " ON INSERT TO lareferencialogtmp " - + " WHERE (EXISTS ( SELECT lareferencialogtmp.matomoid, lareferencialogtmp.source, lareferencialogtmp.id_visit," - + "lareferencialogtmp.action, lareferencialogtmp.\"timestamp\", lareferencialogtmp.entity_id " - + "FROM lareferencialogtmp " - + "WHERE lareferencialogtmp.matomoid=new.matomoid AND lareferencialogtmp.source = new.source AND lareferencialogtmp.id_visit = new.id_visit AND lareferencialogtmp.action = new.action AND lareferencialogtmp.entity_id = new.entity_id AND lareferencialogtmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; - stmt.executeUpdate(sqlCreateTmpTableLaReferenciaLog); - stmt.executeUpdate(sqlcreateTmpRuleLaReferenciaLog); + } catch (Exception e) { + log.error("Failed to create tables: " + e); + throw new Exception("Failed to create tables: " + e.toString(), e); + // System.exit(0); + } + } - stmt.close(); - log.info("Lareferencia Tmp Tables Created"); + private void createTmpTables() throws Exception { - } catch (Exception e) { - log.error("Failed to create tmptables: " + e); - throw new Exception("Failed to create tmp tables: " + e.toString(), e); - //System.exit(0); - } - } - private String getPiwikLogUrl() { - return piwikUrl + "/"; - } + try { + Statement stmt = ConnectDB.getConnection().createStatement(); + String sqlCreateTmpTableLaReferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialogtmp(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));"; + String sqlcreateTmpRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " + + " ON INSERT TO lareferencialogtmp " + + " WHERE (EXISTS ( SELECT lareferencialogtmp.matomoid, lareferencialogtmp.source, lareferencialogtmp.id_visit," + + "lareferencialogtmp.action, lareferencialogtmp.\"timestamp\", lareferencialogtmp.entity_id " + + "FROM lareferencialogtmp " + + "WHERE lareferencialogtmp.matomoid=new.matomoid AND lareferencialogtmp.source = new.source AND lareferencialogtmp.id_visit = new.id_visit AND lareferencialogtmp.action = new.action AND lareferencialogtmp.entity_id = new.entity_id AND lareferencialogtmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; + stmt.executeUpdate(sqlCreateTmpTableLaReferenciaLog); + stmt.executeUpdate(sqlcreateTmpRuleLaReferenciaLog); - private String getJson(String url) throws Exception { - try { - URL website = new URL(url); - URLConnection connection = website.openConnection(); + stmt.close(); + log.info("Lareferencia Tmp Tables Created"); - StringBuilder response; - try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) { - response = new StringBuilder(); - String inputLine; - while ((inputLine = in.readLine()) != null) { - response.append(inputLine); - response.append("\n"); - } - } - return response.toString(); - } catch (Exception e) { - log.error("Failed to get URL: " + e); - throw new Exception("Failed to get URL: " + e.toString(), e); - } - } + } catch (Exception e) { + log.error("Failed to create tmptables: " + e); + throw new Exception("Failed to create tmp tables: " + e.toString(), e); + // System.exit(0); + } + } - public void GetLaReferenciaRepos(String repoLogsPath) throws Exception { + private String getPiwikLogUrl() { + return piwikUrl + "/"; + } - String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth; - String content = ""; + private String getJson(String url) throws Exception { + try { + URL website = new URL(url); + URLConnection connection = website.openConnection(); - content = getJson(baseApiUrl); - JSONParser parser = new JSONParser(); - JSONArray jsonArray = (JSONArray) parser.parse(content); - for (Object aJsonArray : jsonArray) { - JSONObject jsonObjectRow = (JSONObject) aJsonArray; - int idSite = Integer.parseInt(jsonObjectRow.get("idsite").toString()); - this.GetLaReFerenciaLogs(repoLogsPath, idSite); - } - } + StringBuilder response; + try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) { + response = new StringBuilder(); + String inputLine; + while ((inputLine = in.readLine()) != null) { + response.append(inputLine); + response.append("\n"); + } + } + return response.toString(); + } catch (Exception e) { + log.error("Failed to get URL: " + e); + throw new Exception("Failed to get URL: " + e.toString(), e); + } + } - public void GetLaReFerenciaLogs(String repoLogsPath, - int laReferencialMatomoID) throws Exception { + public void GetLaReferenciaRepos(String repoLogsPath) throws Exception { - SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); + String baseApiUrl = getPiwikLogUrl() + ApimethodGetAllSites + format + "&token_auth=" + this.tokenAuth; + String content = ""; - Calendar start = Calendar.getInstance(); - start.set(Calendar.YEAR, 2020); - start.set(Calendar.MONTH, Calendar.JANUARY); - start.set(Calendar.DAY_OF_MONTH, 1); + content = getJson(baseApiUrl); + JSONParser parser = new JSONParser(); + JSONArray jsonArray = (JSONArray) parser.parse(content); + for (Object aJsonArray : jsonArray) { + JSONObject jsonObjectRow = (JSONObject) aJsonArray; + int idSite = Integer.parseInt(jsonObjectRow.get("idsite").toString()); + this.GetLaReFerenciaLogs(repoLogsPath, idSite); + } + } - Calendar end = Calendar.getInstance(); - end.add(Calendar.DAY_OF_MONTH, -1); + public void GetLaReFerenciaLogs(String repoLogsPath, + int laReferencialMatomoID) throws Exception { - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); - PreparedStatement st = ConnectDB.getConnection().prepareStatement("SELECT max(timestamp) FROM lareferencialog WHERE matomoid=? HAVING max(timestamp) is not null;"); - st.setInt(1, laReferencialMatomoID); + SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); - ResultSet rs_date = st.executeQuery(); - while (rs_date.next()) { - if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) { - start.setTime(sdf.parse(rs_date.getString(1))); - } - } - rs_date.close(); + Calendar start = Calendar.getInstance(); + start.set(Calendar.YEAR, 2020); + start.set(Calendar.MONTH, Calendar.JANUARY); + start.set(Calendar.DAY_OF_MONTH, 1); - for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { - log.info("Downloading logs for LaReferencia repoid " + laReferencialMatomoID + " and for " + sdf.format(date)); + Calendar end = Calendar.getInstance(); + end.add(Calendar.DAY_OF_MONTH, -1); - String period = "&period=day&date=" + sdf.format(date); - String outFolder = ""; - outFolder = repoLogsPath; + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + PreparedStatement st = ConnectDB + .getConnection() + .prepareStatement( + "SELECT max(timestamp) FROM " + ConnectDB.getUsageStatsDBSchema() + + ".lareferencialog WHERE matomoid=? HAVING max(timestamp) is not null"); + st.setInt(1, laReferencialMatomoID); - FileSystem fs = FileSystem.get(new Configuration()); + ResultSet rs_date = st.executeQuery(); + while (rs_date.next()) { + if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") + && !rs_date.getString(1).equals("")) { + start.setTime(sdf.parse(rs_date.getString(1))); + } + } + rs_date.close(); - String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + laReferencialMatomoID + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; - String content = ""; + for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + log + .info( + "Downloading logs for LaReferencia repoid " + laReferencialMatomoID + " and for " + + sdf.format(date)); - int i = 0; + String period = "&period=day&date=" + sdf.format(date); + String outFolder = ""; + outFolder = repoLogsPath; - while (!content.equals("[]\n")) { + FileSystem fs = FileSystem.get(new Configuration()); - FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + laReferencialMatomoID + "_LaRefPiwiklog" + sdf.format((date)) + "_" + i + ".json"), true); - String apiUrl = baseApiUrl; + String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + laReferencialMatomoID + period + format + + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; + String content = ""; - if (i > 0) { - apiUrl += "&filter_offset=" + (i * 1000); - } + int i = 0; - content = getJson(apiUrl); + while (!content.equals("[]\n")) { - fin.write(content.getBytes()); + FSDataOutputStream fin = fs + .create( + new Path(outFolder + "/" + laReferencialMatomoID + "_LaRefPiwiklog" + sdf.format((date)) + "_" + + i + ".json"), + true); + String apiUrl = baseApiUrl; - i++; - fin.close(); - } - //fin.close(); - //out.close(); + if (i > 0) { + apiUrl += "&filter_offset=" + (i * 1000); + } - } + content = getJson(apiUrl); - // } - } + fin.write(content.getBytes()); + + i++; + fin.close(); + } + // fin.close(); + // out.close(); + + } + + // } + } } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java index 44b1e12567..a7330f9447 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java @@ -1,3 +1,4 @@ + package eu.dnetlib.oa.graph.usagestats.export; import java.io.*; @@ -13,9 +14,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.RemoteIterator; import org.apache.log4j.Logger; import org.json.simple.JSONArray; @@ -24,310 +25,324 @@ import org.json.simple.parser.JSONParser; public class LaReferenciaStats { - private String logRepoPath; + private String logRepoPath; - private Statement stmt = null; + private Statement stmt = null; - private final Logger log = Logger.getLogger(this.getClass()); - private String CounterRobotsURL; - private ArrayList robotsList; + private final Logger log = Logger.getLogger(this.getClass()); + private String CounterRobotsURL; + private ArrayList robotsList; - public LaReferenciaStats(String logRepoPath) throws Exception { - this.logRepoPath = logRepoPath; - this.createTables(); - this.createTmpTables(); - } + public LaReferenciaStats(String logRepoPath) throws Exception { + this.logRepoPath = logRepoPath; + this.createTables(); + this.createTmpTables(); + } - /* - private void connectDB() throws Exception { - try { - ConnectDB connectDB = new ConnectDB(); - } catch (Exception e) { - log.error("Connect to db failed: " + e); - throw new Exception("Failed to connect to db: " + e.toString(), e); - } - } -*/ - private void createTables() throws Exception { - try { - Statement stmt = ConnectDB.getConnection().createStatement(); - String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialog(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));"; - String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " - + " ON INSERT TO lareferencialog " - + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit," - + "lareferencialog.action, lareferencialog.\"timestamp\", lareferencialog.entity_id " - + "FROM lareferencialog " - + "WHERE lareferencialog.matomoid=new.matomoid AND lareferencialog.source = new.source AND lareferencialog.id_visit = new.id_visit AND lareferencialog.action = new.action AND lareferencialog.entity_id = new.entity_id AND lareferencialog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; - String sqlCreateRuleIndexLaReferenciaLog = "create index if not exists lareferencialog_rule on lareferencialog(matomoid, source, id_visit, action, entity_id, \"timestamp\");"; - stmt.executeUpdate(sqlCreateTableLareferenciaLog); - stmt.executeUpdate(sqlcreateRuleLaReferenciaLog); - stmt.executeUpdate(sqlCreateRuleIndexLaReferenciaLog); + /* + * private void connectDB() throws Exception { try { ConnectDB connectDB = new ConnectDB(); } catch (Exception e) { + * log.error("Connect to db failed: " + e); throw new Exception("Failed to connect to db: " + e.toString(), e); } } + */ + private void createTables() throws Exception { + try { + Statement stmt = ConnectDB.getConnection().createStatement(); + String sqlCreateTableLareferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialog(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));"; + String sqlcreateRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " + + " ON INSERT TO lareferencialog " + + " WHERE (EXISTS ( SELECT lareferencialog.matomoid, lareferencialog.source, lareferencialog.id_visit," + + "lareferencialog.action, lareferencialog.\"timestamp\", lareferencialog.entity_id " + + "FROM lareferencialog " + + "WHERE lareferencialog.matomoid=new.matomoid AND lareferencialog.source = new.source AND lareferencialog.id_visit = new.id_visit AND lareferencialog.action = new.action AND lareferencialog.entity_id = new.entity_id AND lareferencialog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; + String sqlCreateRuleIndexLaReferenciaLog = "create index if not exists lareferencialog_rule on lareferencialog(matomoid, source, id_visit, action, entity_id, \"timestamp\");"; + stmt.executeUpdate(sqlCreateTableLareferenciaLog); + stmt.executeUpdate(sqlcreateRuleLaReferenciaLog); + stmt.executeUpdate(sqlCreateRuleIndexLaReferenciaLog); - stmt.close(); - ConnectDB.getConnection().close(); - log.info("Lareferencia Tables Created"); + stmt.close(); + ConnectDB.getConnection().close(); + log.info("Lareferencia Tables Created"); - } catch (Exception e) { - log.error("Failed to create tables: " + e); - throw new Exception("Failed to create tables: " + e.toString(), e); - //System.exit(0); - } - } + } catch (Exception e) { + log.error("Failed to create tables: " + e); + throw new Exception("Failed to create tables: " + e.toString(), e); + // System.exit(0); + } + } - private void createTmpTables() throws Exception { + private void createTmpTables() throws Exception { - try { - Statement stmt = ConnectDB.getConnection().createStatement(); - String sqlCreateTmpTableLaReferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialogtmp(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));"; - String sqlcreateTmpRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " - + " ON INSERT TO lareferencialogtmp " - + " WHERE (EXISTS ( SELECT lareferencialogtmp.matomoid, lareferencialogtmp.source, lareferencialogtmp.id_visit," - + "lareferencialogtmp.action, lareferencialogtmp.\"timestamp\", lareferencialogtmp.entity_id " - + "FROM lareferencialogtmp " - + "WHERE lareferencialogtmp.matomoid=new.matomoid AND lareferencialogtmp.source = new.source AND lareferencialogtmp.id_visit = new.id_visit AND lareferencialogtmp.action = new.action AND lareferencialogtmp.entity_id = new.entity_id AND lareferencialogtmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; - stmt.executeUpdate(sqlCreateTmpTableLaReferenciaLog); - stmt.executeUpdate(sqlcreateTmpRuleLaReferenciaLog); + try { + Statement stmt = ConnectDB.getConnection().createStatement(); + String sqlCreateTmpTableLaReferenciaLog = "CREATE TABLE IF NOT EXISTS lareferencialogtmp(matomoid INTEGER, source TEXT, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));"; + String sqlcreateTmpRuleLaReferenciaLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS " + + " ON INSERT TO lareferencialogtmp " + + " WHERE (EXISTS ( SELECT lareferencialogtmp.matomoid, lareferencialogtmp.source, lareferencialogtmp.id_visit," + + "lareferencialogtmp.action, lareferencialogtmp.\"timestamp\", lareferencialogtmp.entity_id " + + "FROM lareferencialogtmp " + + "WHERE lareferencialogtmp.matomoid=new.matomoid AND lareferencialogtmp.source = new.source AND lareferencialogtmp.id_visit = new.id_visit AND lareferencialogtmp.action = new.action AND lareferencialogtmp.entity_id = new.entity_id AND lareferencialogtmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;"; + stmt.executeUpdate(sqlCreateTmpTableLaReferenciaLog); + stmt.executeUpdate(sqlcreateTmpRuleLaReferenciaLog); - stmt.close(); - log.info("Lareferencia Tmp Tables Created"); + stmt.close(); + log.info("Lareferencia Tmp Tables Created"); - } catch (Exception e) { - log.error("Failed to create tmptables: " + e); - throw new Exception("Failed to create tmp tables: " + e.toString(), e); - //System.exit(0); - } - } + } catch (Exception e) { + log.error("Failed to create tmptables: " + e); + throw new Exception("Failed to create tmp tables: " + e.toString(), e); + // System.exit(0); + } + } - public void processLogs() throws Exception { - try { + public void processLogs() throws Exception { + try { - processlaReferenciaLog(); - log.info("LaReferencia repository process done"); - removeDoubleClicks(); - log.info("LaReferencia removing double clicks done"); - viewsStats(); - log.info("LaReferencia views done"); - downloadsStats(); - log.info("LaReferencia downloads done"); - updateProdTables(); - log.info("LaReferencia update productions tables done"); + processlaReferenciaLog(); + log.info("LaReferencia repository process done"); + removeDoubleClicks(); + log.info("LaReferencia removing double clicks done"); + viewsStats(); + log.info("LaReferencia views done"); + downloadsStats(); + log.info("LaReferencia downloads done"); + updateProdTables(); + log.info("LaReferencia update productions tables done"); - } catch (Exception e) { - log.error("Failed to process logs: " + e); - throw new Exception("Failed to process logs: " + e.toString(), e); - } - } + } catch (Exception e) { + log.error("Failed to process logs: " + e); + throw new Exception("Failed to process logs: " + e.toString(), e); + } + } - public void processlaReferenciaLog() throws Exception { + public void processlaReferenciaLog() throws Exception { - Statement stmt = ConnectDB.getConnection().createStatement(); - ConnectDB.getConnection().setAutoCommit(false); - ArrayList jsonFiles = listHdfsDir(this.logRepoPath); + Statement stmt = ConnectDB.getConnection().createStatement(); + ConnectDB.getConnection().setAutoCommit(false); + ArrayList jsonFiles = listHdfsDir(this.logRepoPath); - //File dir = new File(this.logRepoPath); - //File[] jsonFiles = dir.listFiles(); - PreparedStatement prepStatem = ConnectDB.getConnection().prepareStatement("INSERT INTO lareferencialogtmp (matomoid, source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?,?)"); - int batch_size = 0; + // File dir = new File(this.logRepoPath); + // File[] jsonFiles = dir.listFiles(); + PreparedStatement prepStatem = ConnectDB + .getConnection() + .prepareStatement( + "INSERT INTO lareferencialogtmp (matomoid, source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?,?)"); + int batch_size = 0; - JSONParser parser = new JSONParser(); - for (String jsonFile : jsonFiles) { - System.out.println(jsonFile); - JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile)); - for (Object aJsonArray : jsonArray) { - JSONObject jsonObjectRow = (JSONObject) aJsonArray; - int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString()); - String idVisit = jsonObjectRow.get("idVisit").toString(); - String country = jsonObjectRow.get("country").toString(); - String referrerName = jsonObjectRow.get("referrerName").toString(); - String agent = jsonObjectRow.get("browser").toString(); - String sourceItemType = "repItem"; + JSONParser parser = new JSONParser(); + for (String jsonFile : jsonFiles) { + System.out.println(jsonFile); + JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile)); + for (Object aJsonArray : jsonArray) { + JSONObject jsonObjectRow = (JSONObject) aJsonArray; + int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString()); + String idVisit = jsonObjectRow.get("idVisit").toString(); + String country = jsonObjectRow.get("country").toString(); + String referrerName = jsonObjectRow.get("referrerName").toString(); + String agent = jsonObjectRow.get("browser").toString(); + String sourceItemType = "repItem"; - JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails")); - for (Object actionDetail : actionDetails) { - JSONObject actionDetailsObj = (JSONObject) actionDetail; + JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails")); + for (Object actionDetail : actionDetails) { + JSONObject actionDetailsObj = (JSONObject) actionDetail; - if (actionDetailsObj.get("customVariables") != null) { - SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - Timestamp timestamp = new Timestamp(Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000); - String url = actionDetailsObj.get("url").toString(); - String oaipmh = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("1")).get("customVariablePageValue1").toString(); - String opendoar = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("2")).get("customVariablePageValue2").toString(); - String action = actionDetailsObj.get("type").toString(); - prepStatem.setInt(1, idSite); - prepStatem.setString(2, "opendoar____::" + opendoar); - prepStatem.setString(3, idVisit); - prepStatem.setString(4, country); - prepStatem.setString(5, action); - prepStatem.setString(6, url); - prepStatem.setString(7, oaipmh); - prepStatem.setString(8, sourceItemType); - prepStatem.setString(9, simpleDateFormat.format(timestamp)); - prepStatem.setString(10, referrerName); - prepStatem.setString(11, agent); - //prepStatem.setString(11, ); - prepStatem.addBatch(); - batch_size++; - if (batch_size == 10000) { - prepStatem.executeBatch(); - ConnectDB.getConnection().commit(); - batch_size = 0; - } - } - } - } - } - try { - prepStatem.executeBatch(); - ConnectDB.getConnection().commit(); - stmt.close(); - } catch (Exception e) { + if (actionDetailsObj.get("customVariables") != null) { + SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + Timestamp timestamp = new Timestamp( + Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000); + String url = actionDetailsObj.get("url").toString(); + String oaipmh = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("1")) + .get("customVariablePageValue1") + .toString(); + String opendoar = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables")).get("2")) + .get("customVariablePageValue2") + .toString(); + String action = actionDetailsObj.get("type").toString(); + prepStatem.setInt(1, idSite); + prepStatem.setString(2, "opendoar____::" + opendoar); + prepStatem.setString(3, idVisit); + prepStatem.setString(4, country); + prepStatem.setString(5, action); + prepStatem.setString(6, url); + prepStatem.setString(7, oaipmh); + prepStatem.setString(8, sourceItemType); + prepStatem.setString(9, simpleDateFormat.format(timestamp)); + prepStatem.setString(10, referrerName); + prepStatem.setString(11, agent); + // prepStatem.setString(11, ); + prepStatem.addBatch(); + batch_size++; + if (batch_size == 10000) { + prepStatem.executeBatch(); + ConnectDB.getConnection().commit(); + batch_size = 0; + } + } + } + } + } + try { + prepStatem.executeBatch(); + ConnectDB.getConnection().commit(); + stmt.close(); + } catch (Exception e) { - if (e instanceof java.sql.SQLException) { - java.sql.SQLException ne = ((java.sql.SQLException) e).getNextException(); - System.out.println(ne.getMessage()); - } - } + if (e instanceof java.sql.SQLException) { + java.sql.SQLException ne = ((java.sql.SQLException) e).getNextException(); + System.out.println(ne.getMessage()); + } + } - } + } - public void removeDoubleClicks() throws Exception { + public void removeDoubleClicks() throws Exception { - Statement stmt = ConnectDB.getConnection().createStatement(); - ConnectDB.getConnection().setAutoCommit(false); + Statement stmt = ConnectDB.getConnection().createStatement(); + ConnectDB.getConnection().setAutoCommit(false); - //clean download double clicks - String sql = "DELETE FROM lareferencialogtmp p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp FROM lareferencialogtmp p1, lareferencialogtmp p2 WHERE p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='download' AND p1.timestamp!=p2.timestamp AND p1.timestamp listHdfsDir(String dir) throws Exception { + FileSystem hdfs = FileSystem.get(new Configuration()); + RemoteIterator Files; + ArrayList fileNames = new ArrayList<>(); - private ArrayList listHdfsDir(String dir) throws Exception { - FileSystem hdfs = FileSystem.get(new Configuration()); - RemoteIterator Files; - ArrayList fileNames = new ArrayList<>(); + try { + Path exportPath = new Path(hdfs.getUri() + dir); + Files = hdfs.listFiles(exportPath, false); + while (Files.hasNext()) { + String fileName = Files.next().getPath().toString(); + // log.info("Found hdfs file " + fileName); + fileNames.add(fileName); + } + // hdfs.close(); + } catch (Exception e) { + log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logRepoPath)); + throw new Exception("HDFS file path with exported data does not exist : " + logRepoPath, e); + } - try { - Path exportPath = new Path(hdfs.getUri() + dir); - Files = hdfs.listFiles(exportPath, false); - while (Files.hasNext()) { - String fileName = Files.next().getPath().toString(); - //log.info("Found hdfs file " + fileName); - fileNames.add(fileName); - } - //hdfs.close(); - } catch (Exception e) { - log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logRepoPath)); - throw new Exception("HDFS file path with exported data does not exist : " + logRepoPath, e); - } + return fileNames; + } - return fileNames; - } + private String readHDFSFile(String filename) throws Exception { + String result; + try { - private String readHDFSFile(String filename) throws Exception { - String result; - try { + FileSystem fs = FileSystem.get(new Configuration()); + // log.info("reading file : " + filename); - FileSystem fs = FileSystem.get(new Configuration()); - //log.info("reading file : " + filename); + BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(filename)))); - BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(filename)))); + StringBuilder sb = new StringBuilder(); + String line = br.readLine(); - StringBuilder sb = new StringBuilder(); - String line = br.readLine(); + while (line != null) { + if (!line.equals("[]")) { + sb.append(line); + } + // sb.append(line); + line = br.readLine(); + } + result = sb.toString().replace("][{\"idSite\"", ",{\"idSite\""); + if (result.equals("")) { + result = "[]"; + } - while (line != null) { - if (!line.equals("[]")) { - sb.append(line); - } - //sb.append(line); - line = br.readLine(); - } - result = sb.toString().replace("][{\"idSite\"", ",{\"idSite\""); - if (result.equals("")) { - result = "[]"; - } + // fs.close(); + } catch (Exception e) { + log.error(e); + throw new Exception(e); + } - //fs.close(); - } catch (Exception e) { - log.error(e); - throw new Exception(e); - } - - return result; - } + return result; + } } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 6c72b1eb85..80848594a7 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -25,7 +25,7 @@ public class UsageStatsExporter { static String lareferenciaLogPath = "/user/spyros/logs/usage_stats_logs6/lareferencia"; static String lareferenciaBaseURL = "http://matomo.lareferencia.info"; static String lareferenciaAuthToken = "484874b3655d5a831eb8db33695790c4"; - + public UsageStatsExporter(Properties properties) { this.properties = properties; } @@ -57,21 +57,22 @@ public class UsageStatsExporter { // piwikstatsdb.processLogs(); log.info("process logs done"); -// LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(lareferenciaBaseURL,lareferenciaAuthToken); -// lrf.GetLaReferenciaRepos(lareferenciaLogPath); + System.out.println("====> Creating LaReferencia tables"); + LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(lareferenciaBaseURL, lareferenciaAuthToken); + lrf.GetLaReferenciaRepos(lareferenciaLogPath); // LaReferenciaStats lastats = new LaReferenciaStats(lareferenciaLogPath); // lastats.processLogs(); // log.info("LaReferencia logs done"); - - IrusStats irusstats = new IrusStats(irusUKBaseURL); + +// IrusStats irusstats = new IrusStats(irusUKBaseURL); // irusstats.getIrusRRReport(irusUKReportPath); // irusstats.processIrusStats(); // log.info("irus done"); - SarcStats sarcStats = new SarcStats(); +// SarcStats sarcStats = new SarcStats(); // sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray); - sarcStats.finalizeSarcStats(); +// sarcStats.finalizeSarcStats(); // log.info("sarc done"); // // finalize usagestats