processPortalLog finished

This commit is contained in:
Spyros Zoupanos 2020-09-12 20:13:33 +03:00
parent 968d53f119
commit 8ddf1dcc15
2 changed files with 187 additions and 222 deletions

View File

@ -128,7 +128,7 @@ public class PiwikStatsDB {
+ ".piwiklogtmp(source INT, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, "
+ "source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) "
+ "clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets "
+ "stored as orc tblproperties('transactional'='true');";
+ "stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(sqlCreateTmpTablePiwikLog);
//////////////////////////////////////////////////
@ -168,8 +168,8 @@ public class PiwikStatsDB {
this.robotsList = counterRobots.getRobotsPatterns();
System.out.println("====> Processing repository logs");
// processRepositoryLog();
System.out.println("====> Repository process done");
processRepositoryLog();
System.out.println("====> Repository logs process done");
log.info("repository process done");
System.out.println("====> Removing double clicks");
@ -183,16 +183,20 @@ public class PiwikStatsDB {
log.info("cleaning oai done");
System.out.println("====> ViewsStats processing starts");
viewsStats();
// viewsStats();
System.out.println("====> ViewsStats processing ends");
System.out.println("====> DownloadsStats processing starts");
downloadsStats();
// downloadsStats();
System.out.println("====> DownloadsStats processing starts");
System.out.println("====> Processing portal logs");
processPortalLog();
System.out.println("====> Portal logs process done");
log.info("portal process done");
System.exit(0);
portalStats();
log.info("portal usagestats done");
@ -339,44 +343,41 @@ public class PiwikStatsDB {
".result_views_monthly_tmp";
stmt.executeUpdate(drop_result_views_monthly_tmp);
System.out.println("====> Dropped result_views_monthly_tmp table");
System.out.println("====> Creating result_views_monthly_tmp table");
String create_result_views_monthly_tmp =
"CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp " +
String create_result_views_monthly_tmp = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ ".result_views_monthly_tmp " +
"AS SELECT entity_id AS id, " +
"COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) " +
"AS openaire_referrer, " +
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
"AS openaire_referrer, " +
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
"FROM `usagestats_13`.piwiklogtmp where action='action' and (source_item_type='oaItem' or " +
"source_item_type='repItem') " +
"source_item_type='repItem') " +
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
"source ORDER BY source, entity_id";
stmt.executeUpdate(create_result_views_monthly_tmp);
System.out.println("====> Created result_views_monthly_tmp table");
System.out.println("====> Dropping views_stats_tmp table");
String drop_views_stats_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".views_stats_tmp";
stmt.executeUpdate(drop_views_stats_tmp);
System.out.println("====> Dropped views_stats_tmp table");
System.out.println("====> Creating views_stats_tmp table");
String create_views_stats_tmp =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp " +
String create_views_stats_tmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".views_stats_tmp " +
"AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
"max(views) AS count, max(openaire_referrer) AS openaire " +
"max(views) AS count, max(openaire_referrer) AS openaire " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " +
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
"WHERE p.source!='5' AND p.source=d.piwik_id AND p.id=ro.oid " +
"GROUP BY d.id, ro.id, month " +
"WHERE p.source!='5' AND p.source=d.piwik_id AND p.id=ro.oid " +
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
stmt.executeUpdate(create_views_stats_tmp);
System.out.println("====> Created views_stats_tmp table");
System.out.println("====> Dropping views_stats table");
String drop_views_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
@ -385,43 +386,40 @@ public class PiwikStatsDB {
System.out.println("====> Dropped views_stats table");
System.out.println("====> Creating views_stats table");
String create_view_stats =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
String create_view_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
stmt.executeUpdate(create_view_stats);
System.out.println("====> Created views_stats table");
System.out.println("====> Dropping pageviews_stats_tmp table");
String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".pageviews_stats_tmp";
stmt.executeUpdate(drop_pageviews_stats_tmp);
System.out.println("====> Dropped pageviews_stats_tmp table");
System.out.println("====> Creating pageviews_stats_tmp table");
String create_pageviews_stats_tmp =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp AS SELECT " +
"'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count " +
String create_pageviews_stats_tmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".pageviews_stats_tmp AS SELECT " +
"'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " +
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
"WHERE p.source='23' AND p.source=d.piwik_id and p.id=ro.oid \n" +
"GROUP BY d.id, ro.id, month " +
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
"WHERE p.source='23' AND p.source=d.piwik_id and p.id=ro.oid \n" +
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
stmt.executeUpdate(create_pageviews_stats_tmp);
System.out.println("====> Created pageviews_stats_tmp table");
System.out.println("====> Droping pageviews_stats table");
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".pageviews_stats";
stmt.executeUpdate(drop_pageviews_stats);
System.out.println("====> Dropped pageviews_stats table");
System.out.println("====> Creating pageviews_stats table");
String create_pageviews_stats =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".pageviews_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
stmt.executeUpdate(create_pageviews_stats);
System.out.println("====> Created pageviews_stats table");
@ -440,53 +438,48 @@ public class PiwikStatsDB {
".result_views_monthly_tmp";
stmt.executeUpdate(drop_result_views_monthly_tmp);
System.out.println("====> Dropped result_downloads_monthly_tmp view");
System.out.println("====> Creating result_views_monthly_tmp view");
String sql =
"CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " +
"AS SELECT entity_id AS id, COUNT(entity_id) as downloads, " +
"SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, " +
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " +
"AS SELECT entity_id AS id, COUNT(entity_id) as downloads, " +
"SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, " +
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp where action='download' " +
"AND (source_item_type='oaItem' OR source_item_type='repItem') " +
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source " +
"AND (source_item_type='oaItem' OR source_item_type='repItem') " +
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source " +
"ORDER BY source, entity_id, month";
stmt.executeUpdate(sql);
System.out.println("====> Created result_views_monthly_tmp view");
System.out.println("====> Dropping downloads_stats_tmp table");
String drop_views_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".downloads_stats_tmp";
stmt.executeUpdate(drop_views_stats);
System.out.println("====> Dropped downloads_stats_tmp table");
System.out.println("====> Creating downloads_stats_tmp view");
sql =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
"SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp p, " +
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
"WHERE p.source=d.piwik_id and p.id=ro.oid " +
"GROUP BY d.id, ro.id, month " +
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
"WHERE p.source=d.piwik_id and p.id=ro.oid " +
"GROUP BY d.id, ro.id, month " +
"ORDER BY d.id, ro.id, month";
System.out.println("====> Created downloads_stats_tmp view");
stmt.executeUpdate(sql);
System.out.println("====> Dropping downloads_stats table");
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".downloads_stats";
stmt.executeUpdate(drop_pageviews_stats);
System.out.println("====> Dropped downloads_stats table");
System.out.println("====> Creating downloads_stats table");
String create_pageviews_stats =
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ ".downloads_stats " +
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
stmt.executeUpdate(create_pageviews_stats);
System.out.println("====> Created downloads_stats table");
@ -498,7 +491,7 @@ public class PiwikStatsDB {
stmt.close();
ConnectDB.getConnection().close();
System.exit(0);
}
@ -723,93 +716,93 @@ public class PiwikStatsDB {
ConnectDB.getConnection().close();
}
// Import OPENAIRE Logs to DB
public void processPortalLog() throws Exception {
Statement stmt = ConnectDB.getConnection().createStatement();
ConnectDB.getConnection().setAutoCommit(false);
ArrayList<String> jsonFiles = listHdfsDir(this.logPortalPath);
// File folder = new File(this.logPortalPath);
// File[] jsonFiles = folder.listFiles();
System.out.println("====> Dropping process_portal_log_tmp_json table");
String drop_process_portal_log_tmp_json = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".piwiklogtmp_json";
stmt.executeUpdate(drop_process_portal_log_tmp_json);
System.out.println("====> Dropped process_portal_log_tmp_json table");
PreparedStatement prepStatem = ConnectDB
.getConnection()
.prepareStatement(
"INSERT INTO process_portal_log_tmp (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)");
int batch_size = 0;
JSONParser parser = new JSONParser();
for (String jsonFile : jsonFiles) {
JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
System.out.println("====> Creating process_portal_log_tmp_json");
String create_process_portal_log_tmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp_json(" +
" `idSite` STRING,\n" +
" `idVisit` STRING,\n" +
" `country` STRING,\n" +
" `referrerName` STRING,\n" +
" `browser` STRING,\n" +
" `actionDetails` ARRAY<\n" +
" struct<\n" +
" type: STRING,\n" +
" url: STRING,\n" +
" timestamp: String\n" +
" >\n" +
" >\n" +
")\n" +
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
"LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_process_portal_log_tmp_json);
System.out.println("====> Created process_portal_log_tmp_json");
for (Object aJsonArray : jsonArray) {
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
String idVisit = jsonObjectRow.get("idVisit").toString();
String country = jsonObjectRow.get("country").toString();
String referrerName = jsonObjectRow.get("referrerName").toString();
String agent = jsonObjectRow.get("browser").toString();
boolean botFound = false;
Iterator it = robotsList.iterator();
while (it.hasNext()) {
// Create a Pattern object
Pattern r = Pattern.compile(it.next().toString());
// Now create matcher object.
Matcher m = r.matcher(agent);
if (m.find()) {
botFound = true;
break;
}
}
if (botFound == false) {
JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
for (Object actionDetail : actionDetails) {
JSONObject actionDetailsObj = (JSONObject) actionDetail;
System.out.println("====> Droping process_portal_log_tmp table");
String drop_process_portal_log_tmp = "DROP TABLE IF EXISTS " +
ConnectDB.getUsageStatsDBSchema() +
".process_portal_log_tmp";
stmt.executeUpdate(drop_process_portal_log_tmp);
System.out.println("====> Dropped process_portal_log_tmp");
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
Timestamp timestamp = new Timestamp(
Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
System.out.println("====> Creating process_portal_log_tmp");
String create_process_portal_log_tmp = "CREATE TABLE " +
ConnectDB.getUsageStatsDBSchema() +
".process_portal_log_tmp (source BIGINT, id_visit STRING, country STRING, action STRING, url STRING, " +
"entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
"clustered by (source, id_visit, timestamp) into 100 buckets stored as orc tblproperties('transactional'='true')";
stmt.executeUpdate(create_process_portal_log_tmp);
System.out.println("====> Created process_portal_log_tmp");
String action = actionDetailsObj.get("type").toString();
String url = actionDetailsObj.get("url").toString();
String entityID = processPortalURL(url);
String sourceItemType = "";
if (entityID.indexOf("|") > 0) {
sourceItemType = entityID.substring(0, entityID.indexOf("|"));
entityID = entityID.substring(entityID.indexOf("|") + 1);
}
prepStatem.setInt(1, idSite);
prepStatem.setString(2, idVisit);
prepStatem.setString(3, country);
prepStatem.setString(4, action);
prepStatem.setString(5, url);
prepStatem.setString(6, entityID);
prepStatem.setString(7, sourceItemType);
prepStatem.setString(8, simpleDateFormat.format(timestamp));
prepStatem.setString(9, referrerName);
prepStatem.setString(10, agent);
prepStatem.addBatch();
batch_size++;
if (batch_size == 10000) {
prepStatem.executeBatch();
ConnectDB.getConnection().commit();
batch_size = 0;
}
}
}
}
}
prepStatem.executeBatch();
ConnectDB.getConnection().commit();
System.out.println("====> Inserting into process_portal_log_tmp");
String insert_process_portal_log_tmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ ".process_portal_log_tmp " +
"SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, actiondetail.type as action, "
+
"actiondetail.url as url, " +
"CASE\n" +
" WHEN (actiondetail.url like '%datasourceId=%') THEN split(actiondetail.url,'datasourceId=')[1] " +
" WHEN (actiondetail.url like '%datasource=%') THEN split(actiondetail.url,'datasource=')[1] " +
" WHEN (actiondetail.url like '%datasourceFilter=%') THEN split(actiondetail.url,'datasourceFilter=')[1] "
+
" WHEN (actiondetail.url like '%articleId=%') THEN split(actiondetail.url,'articleId=')[1] " +
" WHEN (actiondetail.url like '%datasetId=%') THEN split(actiondetail.url,'datasetId=')[1] " +
" WHEN (actiondetail.url like '%projectId=%') THEN split(actiondetail.url,'projectId=')[1] " +
" WHEN (actiondetail.url like '%organizationId=%') THEN split(actiondetail.url,'organizationId=')[1] " +
" ELSE '' " +
"END AS entity_id, " +
"CASE " +
" WHEN (actiondetail.url like '%datasourceId=%') THEN 'datasource' " +
" WHEN (actiondetail.url like '%datasource=%') THEN 'datasource' " +
" WHEN (actiondetail.url like '%datasourceFilter=%') THEN 'datasource' " +
" WHEN (actiondetail.url like '%articleId=%') THEN 'result' " +
" WHEN (actiondetail.url like '%datasetId=%') THEN 'result' " +
" WHEN (actiondetail.url like '%projectId=%') THEN 'project' " +
" WHEN (actiondetail.url like '%organizationId=%') THEN 'organization' " +
" ELSE '' " +
"END AS source_item_type, " +
"from_unixtime(cast(actiondetail.timestamp as BIGINT)) as timestamp, referrerName as referrer_name, " +
"browser as agent " +
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp_json " +
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
stmt.executeUpdate(insert_process_portal_log_tmp);
System.out.println("====> Inserted into process_portal_log_tmp");
stmt.close();
ConnectDB.getConnection().close();
}
public void portalStats() throws SQLException {
Connection con = ConnectDB.getConnection();
Statement stmt = con.createStatement();
@ -845,135 +838,120 @@ public class PiwikStatsDB {
ConnectDB.getConnection().setAutoCommit(false);
System.out.println("====> Cleaning oai - Step 1");
stmt = ConnectDB.getConnection().createStatement();
String sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
stmt = ConnectDB.getConnection().createStatement();
String sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chlc.min-saude.pt/'," +
"'oai:repositorio.chlc.min-saude.pt:') WHERE entity_id LIKE 'oai:repositorio.chlc.min-saude.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 2");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hospitaldebraga.pt/'," +
"'oai:repositorio.hospitaldebraga.pt:') WHERE entity_id LIKE 'oai:repositorio.hospitaldebraga.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 3");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipl.pt/'," +
"'oai:repositorio.ipl.pt:') WHERE entity_id LIKE 'oai:repositorio.ipl.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 4");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:bibliotecadigital.ipb.pt/'," +
"'oai:bibliotecadigital.ipb.pt:') WHERE entity_id LIKE 'oai:bibliotecadigital.ipb.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 5");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ismai.pt/'," +
"'oai:repositorio.ismai.pt:') WHERE entity_id LIKE 'oai:repositorio.ismai.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 6");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorioaberto.uab.pt/'," +
"'oai:repositorioaberto.uab.pt:') WHERE entity_id LIKE 'oai:repositorioaberto.uab.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 7");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.uac.pt/'," +
"'oai:repositorio.uac.pt:') WHERE entity_id LIKE 'oai:repositorio.uac.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 8");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.insa.pt/'," +
"'oai:repositorio.insa.pt:') WHERE entity_id LIKE 'oai:repositorio.insa.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 9");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipcb.pt/'," +
"'oai:repositorio.ipcb.pt:') WHERE entity_id LIKE 'oai:repositorio.ipcb.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 10");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ispa.pt/'," +
"'oai:repositorio.ispa.pt:') WHERE entity_id LIKE 'oai:repositorio.ispa.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 11");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chporto.pt/'," +
"'oai:repositorio.chporto.pt:') WHERE entity_id LIKE 'oai:repositorio.chporto.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 12");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ucp.pt/'," +
"'oai:repositorio.ucp.pt:') WHERE entity_id LIKE 'oai:repositorio.ucp.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 13");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:rihuc.huc.min-saude.pt/'," +
"'oai:rihuc.huc.min-saude.pt:') WHERE entity_id LIKE 'oai:rihuc.huc.min-saude.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 14");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipv.pt/'," +
"'oai:repositorio.ipv.pt:') WHERE entity_id LIKE 'oai:repositorio.ipv.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 15");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:www.repository.utl.pt/'," +
"'oai:www.repository.utl.pt:') WHERE entity_id LIKE 'oai:www.repository.utl.pt/%'";
stmt.executeUpdate(sql);
@ -981,53 +959,47 @@ public class PiwikStatsDB {
System.out.println("====> Cleaning oai - Step 16");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:run.unl.pt/'," +
"'oai:run.unl.pt:') WHERE entity_id LIKE 'oai:run.unl.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 17");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:sapientia.ualg.pt/'," +
"'oai:sapientia.ualg.pt:') WHERE entity_id LIKE 'oai:sapientia.ualg.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 18");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipsantarem.pt/'," +
"'oai:repositorio.ipsantarem.pt:') WHERE entity_id LIKE 'oai:repositorio.ipsantarem.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 19");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:arca.igc.gulbenkian.pt/'," +
"'oai:arca.igc.gulbenkian.pt:') WHERE entity_id LIKE 'oai:arca.igc.gulbenkian.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 20");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:ubibliorum.ubi.pt/'," +
"'oai:ubibliorum.ubi.pt:') WHERE entity_id LIKE 'oai:ubibliorum.ubi.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 21");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:digituma.uma.pt/'," +
"'oai:digituma.uma.pt:') WHERE entity_id LIKE 'oai:digituma.uma.pt/%'";
stmt.executeUpdate(sql);
@ -1035,17 +1007,15 @@ public class PiwikStatsDB {
System.out.println("====> Cleaning oai - Step 22");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ul.pt/'," +
"'oai:repositorio.ul.pt:') WHERE entity_id LIKE 'oai:repositorio.ul.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 23");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hff.min-saude.pt/'," +
"'oai:repositorio.hff.min-saude.pt:') WHERE entity_id LIKE 'oai:repositorio.hff.min-saude.pt/%'";
stmt.executeUpdate(sql);
@ -1053,17 +1023,15 @@ public class PiwikStatsDB {
System.out.println("====> Cleaning oai - Step 24");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorium.sdum.uminho.pt/'," +
"'oai:repositorium.sdum.uminho.pt:') WHERE entity_id LIKE 'oai:repositorium.sdum.uminho.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 25");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:recipp.ipp.pt/'," +
"'oai:recipp.ipp.pt:') WHERE entity_id LIKE 'oai:recipp.ipp.pt/%'";
stmt.executeUpdate(sql);
@ -1071,17 +1039,15 @@ public class PiwikStatsDB {
System.out.println("====> Cleaning oai - Step 26");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:bdigital.ufp.pt/'," +
"'oai:bdigital.ufp.pt:') WHERE entity_id LIKE 'oai:bdigital.ufp.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 27");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.lneg.pt/'," +
"'oai:repositorio.lneg.pt:') WHERE entity_id LIKE 'oai:repositorio.lneg.pt/%'";
stmt.executeUpdate(sql);
@ -1089,23 +1055,20 @@ public class PiwikStatsDB {
System.out.println("====> Cleaning oai - Step 28");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:iconline.ipleiria.pt/'," +
"'oai:iconline.ipleiria.pt:') WHERE entity_id LIKE 'oai:iconline.ipleiria.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Step 29");
stmt = ConnectDB.getConnection().createStatement();
sql =
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
"SET entity_id = regexp_replace(entity_id, '^oai:comum.rcaap.pt/'," +
"'oai:comum.rcaap.pt:') WHERE entity_id LIKE 'oai:comum.rcaap.pt/%'";
stmt.executeUpdate(sql);
stmt.close();
System.out.println("====> Cleaning oai - Done, closing connection");
ConnectDB.getConnection().close();
}

View File

@ -12,13 +12,13 @@ public class UsageStatsExporter {
static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
static String matomoBaseURL = "analytics.openaire.eu";
static String repoLogPath = "/user/spyros/logs/usage_stats_logs3/Repologs";
static String portalLogPath = "/user/spyros/logs/usage_stats_logs3/Portallogs/";
static String repoLogPath = "/user/spyros/logs/usage_stats_logs4/Repologs";
static String portalLogPath = "/user/spyros/logs/usage_stats_logs4/Portallogs/";
static String portalMatomoID = "109";
static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs3/irusUKReports";
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs3/sarcReports";
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs4/irusUKReports";
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs4/sarcReports";
public UsageStatsExporter(Properties properties) {
this.properties = properties;
@ -39,7 +39,9 @@ public class UsageStatsExporter {
// // the moment
System.out.println("====> Initializing the download logs module");
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
System.out.println("====> Downloading logs");
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
System.out.println("====> Downloaded logs");
// Create DB tables, insert/update statistics
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");