forked from D-Net/dnet-hadoop
processPortalLog finished
This commit is contained in:
parent
968d53f119
commit
8ddf1dcc15
|
@ -128,7 +128,7 @@ public class PiwikStatsDB {
|
|||
+ ".piwiklogtmp(source INT, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, "
|
||||
+ "source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) "
|
||||
+ "clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets "
|
||||
+ "stored as orc tblproperties('transactional'='true');";
|
||||
+ "stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(sqlCreateTmpTablePiwikLog);
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
|
@ -168,8 +168,8 @@ public class PiwikStatsDB {
|
|||
this.robotsList = counterRobots.getRobotsPatterns();
|
||||
|
||||
System.out.println("====> Processing repository logs");
|
||||
// processRepositoryLog();
|
||||
System.out.println("====> Repository process done");
|
||||
processRepositoryLog();
|
||||
System.out.println("====> Repository logs process done");
|
||||
log.info("repository process done");
|
||||
|
||||
System.out.println("====> Removing double clicks");
|
||||
|
@ -183,16 +183,20 @@ public class PiwikStatsDB {
|
|||
log.info("cleaning oai done");
|
||||
|
||||
System.out.println("====> ViewsStats processing starts");
|
||||
viewsStats();
|
||||
// viewsStats();
|
||||
System.out.println("====> ViewsStats processing ends");
|
||||
|
||||
System.out.println("====> DownloadsStats processing starts");
|
||||
downloadsStats();
|
||||
// downloadsStats();
|
||||
System.out.println("====> DownloadsStats processing starts");
|
||||
|
||||
System.out.println("====> Processing portal logs");
|
||||
processPortalLog();
|
||||
System.out.println("====> Portal logs process done");
|
||||
log.info("portal process done");
|
||||
|
||||
System.exit(0);
|
||||
|
||||
portalStats();
|
||||
log.info("portal usagestats done");
|
||||
|
||||
|
@ -341,8 +345,8 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Dropped result_views_monthly_tmp table");
|
||||
|
||||
System.out.println("====> Creating result_views_monthly_tmp table");
|
||||
String create_result_views_monthly_tmp =
|
||||
"CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp " +
|
||||
String create_result_views_monthly_tmp = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".result_views_monthly_tmp " +
|
||||
"AS SELECT entity_id AS id, " +
|
||||
"COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) " +
|
||||
"AS openaire_referrer, " +
|
||||
|
@ -354,8 +358,6 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(create_result_views_monthly_tmp);
|
||||
System.out.println("====> Created result_views_monthly_tmp table");
|
||||
|
||||
|
||||
|
||||
System.out.println("====> Dropping views_stats_tmp table");
|
||||
String drop_views_stats_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
|
@ -364,8 +366,8 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Dropped views_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating views_stats_tmp table");
|
||||
String create_views_stats_tmp =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp " +
|
||||
String create_views_stats_tmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".views_stats_tmp " +
|
||||
"AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||
"max(views) AS count, max(openaire_referrer) AS openaire " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " +
|
||||
|
@ -376,7 +378,6 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(create_views_stats_tmp);
|
||||
System.out.println("====> Created views_stats_tmp table");
|
||||
|
||||
|
||||
System.out.println("====> Dropping views_stats table");
|
||||
String drop_views_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
|
@ -385,13 +386,11 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Dropped views_stats table");
|
||||
|
||||
System.out.println("====> Creating views_stats table");
|
||||
String create_view_stats =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
|
||||
String create_view_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats_tmp";
|
||||
stmt.executeUpdate(create_view_stats);
|
||||
System.out.println("====> Created views_stats table");
|
||||
|
||||
|
||||
System.out.println("====> Dropping pageviews_stats_tmp table");
|
||||
String drop_pageviews_stats_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
|
@ -400,8 +399,8 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Dropped pageviews_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating pageviews_stats_tmp table");
|
||||
String create_pageviews_stats_tmp =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp AS SELECT " +
|
||||
String create_pageviews_stats_tmp = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".pageviews_stats_tmp AS SELECT " +
|
||||
"'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_views_monthly_tmp p, " +
|
||||
ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
|
||||
|
@ -411,7 +410,6 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(create_pageviews_stats_tmp);
|
||||
System.out.println("====> Created pageviews_stats_tmp table");
|
||||
|
||||
|
||||
System.out.println("====> Droping pageviews_stats table");
|
||||
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
|
@ -420,8 +418,8 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Dropped pageviews_stats table");
|
||||
|
||||
System.out.println("====> Creating pageviews_stats table");
|
||||
String create_pageviews_stats =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats " +
|
||||
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".pageviews_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats_tmp";
|
||||
stmt.executeUpdate(create_pageviews_stats);
|
||||
System.out.println("====> Created pageviews_stats table");
|
||||
|
@ -442,8 +440,7 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Dropped result_downloads_monthly_tmp view");
|
||||
|
||||
System.out.println("====> Creating result_views_monthly_tmp view");
|
||||
String sql =
|
||||
"CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " +
|
||||
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp " +
|
||||
"AS SELECT entity_id AS id, COUNT(entity_id) as downloads, " +
|
||||
"SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
||||
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
||||
|
@ -454,7 +451,6 @@ public class PiwikStatsDB {
|
|||
stmt.executeUpdate(sql);
|
||||
System.out.println("====> Created result_views_monthly_tmp view");
|
||||
|
||||
|
||||
System.out.println("====> Dropping downloads_stats_tmp table");
|
||||
String drop_views_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
|
@ -463,8 +459,7 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Dropped downloads_stats_tmp table");
|
||||
|
||||
System.out.println("====> Creating downloads_stats_tmp view");
|
||||
sql =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
|
||||
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp AS " +
|
||||
"SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
||||
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".result_downloads_monthly_tmp p, " +
|
||||
|
@ -475,8 +470,6 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Created downloads_stats_tmp view");
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
|
||||
|
||||
System.out.println("====> Dropping downloads_stats table");
|
||||
String drop_pageviews_stats = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
|
@ -485,8 +478,8 @@ public class PiwikStatsDB {
|
|||
System.out.println("====> Dropped downloads_stats table");
|
||||
|
||||
System.out.println("====> Creating downloads_stats table");
|
||||
String create_pageviews_stats =
|
||||
"CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats " +
|
||||
String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".downloads_stats " +
|
||||
"STORED AS PARQUET AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats_tmp";
|
||||
stmt.executeUpdate(create_pageviews_stats);
|
||||
System.out.println("====> Created downloads_stats table");
|
||||
|
@ -723,93 +716,93 @@ public class PiwikStatsDB {
|
|||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
||||
// Import OPENAIRE Logs to DB
|
||||
public void processPortalLog() throws Exception {
|
||||
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||
ConnectDB.getConnection().setAutoCommit(false);
|
||||
|
||||
ArrayList<String> jsonFiles = listHdfsDir(this.logPortalPath);
|
||||
// File folder = new File(this.logPortalPath);
|
||||
// File[] jsonFiles = folder.listFiles();
|
||||
System.out.println("====> Dropping process_portal_log_tmp_json table");
|
||||
String drop_process_portal_log_tmp_json = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".piwiklogtmp_json";
|
||||
stmt.executeUpdate(drop_process_portal_log_tmp_json);
|
||||
System.out.println("====> Dropped process_portal_log_tmp_json table");
|
||||
|
||||
PreparedStatement prepStatem = ConnectDB
|
||||
.getConnection()
|
||||
.prepareStatement(
|
||||
"INSERT INTO process_portal_log_tmp (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)");
|
||||
int batch_size = 0;
|
||||
JSONParser parser = new JSONParser();
|
||||
for (String jsonFile : jsonFiles) {
|
||||
JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
|
||||
System.out.println("====> Creating process_portal_log_tmp_json");
|
||||
String create_process_portal_log_tmp_json = "CREATE EXTERNAL TABLE IF NOT EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp_json(" +
|
||||
" `idSite` STRING,\n" +
|
||||
" `idVisit` STRING,\n" +
|
||||
" `country` STRING,\n" +
|
||||
" `referrerName` STRING,\n" +
|
||||
" `browser` STRING,\n" +
|
||||
" `actionDetails` ARRAY<\n" +
|
||||
" struct<\n" +
|
||||
" type: STRING,\n" +
|
||||
" url: STRING,\n" +
|
||||
" timestamp: String\n" +
|
||||
" >\n" +
|
||||
" >\n" +
|
||||
")\n" +
|
||||
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
|
||||
"LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_process_portal_log_tmp_json);
|
||||
System.out.println("====> Created process_portal_log_tmp_json");
|
||||
|
||||
for (Object aJsonArray : jsonArray) {
|
||||
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||
int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
|
||||
String idVisit = jsonObjectRow.get("idVisit").toString();
|
||||
String country = jsonObjectRow.get("country").toString();
|
||||
String referrerName = jsonObjectRow.get("referrerName").toString();
|
||||
String agent = jsonObjectRow.get("browser").toString();
|
||||
boolean botFound = false;
|
||||
Iterator it = robotsList.iterator();
|
||||
while (it.hasNext()) {
|
||||
// Create a Pattern object
|
||||
Pattern r = Pattern.compile(it.next().toString());
|
||||
// Now create matcher object.
|
||||
Matcher m = r.matcher(agent);
|
||||
if (m.find()) {
|
||||
botFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (botFound == false) {
|
||||
JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
|
||||
for (Object actionDetail : actionDetails) {
|
||||
JSONObject actionDetailsObj = (JSONObject) actionDetail;
|
||||
System.out.println("====> Droping process_portal_log_tmp table");
|
||||
String drop_process_portal_log_tmp = "DROP TABLE IF EXISTS " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".process_portal_log_tmp";
|
||||
stmt.executeUpdate(drop_process_portal_log_tmp);
|
||||
System.out.println("====> Dropped process_portal_log_tmp");
|
||||
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
Timestamp timestamp = new Timestamp(
|
||||
Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
|
||||
System.out.println("====> Creating process_portal_log_tmp");
|
||||
String create_process_portal_log_tmp = "CREATE TABLE " +
|
||||
ConnectDB.getUsageStatsDBSchema() +
|
||||
".process_portal_log_tmp (source BIGINT, id_visit STRING, country STRING, action STRING, url STRING, " +
|
||||
"entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) " +
|
||||
"clustered by (source, id_visit, timestamp) into 100 buckets stored as orc tblproperties('transactional'='true')";
|
||||
stmt.executeUpdate(create_process_portal_log_tmp);
|
||||
System.out.println("====> Created process_portal_log_tmp");
|
||||
|
||||
String action = actionDetailsObj.get("type").toString();
|
||||
String url = actionDetailsObj.get("url").toString();
|
||||
|
||||
String entityID = processPortalURL(url);
|
||||
String sourceItemType = "";
|
||||
|
||||
if (entityID.indexOf("|") > 0) {
|
||||
sourceItemType = entityID.substring(0, entityID.indexOf("|"));
|
||||
entityID = entityID.substring(entityID.indexOf("|") + 1);
|
||||
}
|
||||
|
||||
prepStatem.setInt(1, idSite);
|
||||
prepStatem.setString(2, idVisit);
|
||||
prepStatem.setString(3, country);
|
||||
prepStatem.setString(4, action);
|
||||
prepStatem.setString(5, url);
|
||||
prepStatem.setString(6, entityID);
|
||||
prepStatem.setString(7, sourceItemType);
|
||||
prepStatem.setString(8, simpleDateFormat.format(timestamp));
|
||||
prepStatem.setString(9, referrerName);
|
||||
prepStatem.setString(10, agent);
|
||||
|
||||
prepStatem.addBatch();
|
||||
batch_size++;
|
||||
if (batch_size == 10000) {
|
||||
prepStatem.executeBatch();
|
||||
ConnectDB.getConnection().commit();
|
||||
batch_size = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
prepStatem.executeBatch();
|
||||
ConnectDB.getConnection().commit();
|
||||
System.out.println("====> Inserting into process_portal_log_tmp");
|
||||
String insert_process_portal_log_tmp = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
|
||||
+ ".process_portal_log_tmp " +
|
||||
"SELECT DISTINCT cast(idSite as BIGINT) as source, idVisit as id_Visit, country, actiondetail.type as action, "
|
||||
+
|
||||
"actiondetail.url as url, " +
|
||||
"CASE\n" +
|
||||
" WHEN (actiondetail.url like '%datasourceId=%') THEN split(actiondetail.url,'datasourceId=')[1] " +
|
||||
" WHEN (actiondetail.url like '%datasource=%') THEN split(actiondetail.url,'datasource=')[1] " +
|
||||
" WHEN (actiondetail.url like '%datasourceFilter=%') THEN split(actiondetail.url,'datasourceFilter=')[1] "
|
||||
+
|
||||
" WHEN (actiondetail.url like '%articleId=%') THEN split(actiondetail.url,'articleId=')[1] " +
|
||||
" WHEN (actiondetail.url like '%datasetId=%') THEN split(actiondetail.url,'datasetId=')[1] " +
|
||||
" WHEN (actiondetail.url like '%projectId=%') THEN split(actiondetail.url,'projectId=')[1] " +
|
||||
" WHEN (actiondetail.url like '%organizationId=%') THEN split(actiondetail.url,'organizationId=')[1] " +
|
||||
" ELSE '' " +
|
||||
"END AS entity_id, " +
|
||||
"CASE " +
|
||||
" WHEN (actiondetail.url like '%datasourceId=%') THEN 'datasource' " +
|
||||
" WHEN (actiondetail.url like '%datasource=%') THEN 'datasource' " +
|
||||
" WHEN (actiondetail.url like '%datasourceFilter=%') THEN 'datasource' " +
|
||||
" WHEN (actiondetail.url like '%articleId=%') THEN 'result' " +
|
||||
" WHEN (actiondetail.url like '%datasetId=%') THEN 'result' " +
|
||||
" WHEN (actiondetail.url like '%projectId=%') THEN 'project' " +
|
||||
" WHEN (actiondetail.url like '%organizationId=%') THEN 'organization' " +
|
||||
" ELSE '' " +
|
||||
"END AS source_item_type, " +
|
||||
"from_unixtime(cast(actiondetail.timestamp as BIGINT)) as timestamp, referrerName as referrer_name, " +
|
||||
"browser as agent " +
|
||||
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".process_portal_log_tmp_json " +
|
||||
"LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail";
|
||||
stmt.executeUpdate(insert_process_portal_log_tmp);
|
||||
System.out.println("====> Inserted into process_portal_log_tmp");
|
||||
|
||||
stmt.close();
|
||||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
||||
|
||||
public void portalStats() throws SQLException {
|
||||
Connection con = ConnectDB.getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
|
@ -846,8 +839,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 1");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
String sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
String sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chlc.min-saude.pt/'," +
|
||||
"'oai:repositorio.chlc.min-saude.pt:') WHERE entity_id LIKE 'oai:repositorio.chlc.min-saude.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -855,8 +847,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 2");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hospitaldebraga.pt/'," +
|
||||
"'oai:repositorio.hospitaldebraga.pt:') WHERE entity_id LIKE 'oai:repositorio.hospitaldebraga.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -864,8 +855,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 3");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipl.pt/'," +
|
||||
"'oai:repositorio.ipl.pt:') WHERE entity_id LIKE 'oai:repositorio.ipl.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -873,8 +863,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 4");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:bibliotecadigital.ipb.pt/'," +
|
||||
"'oai:bibliotecadigital.ipb.pt:') WHERE entity_id LIKE 'oai:bibliotecadigital.ipb.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -882,8 +871,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 5");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ismai.pt/'," +
|
||||
"'oai:repositorio.ismai.pt:') WHERE entity_id LIKE 'oai:repositorio.ismai.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -891,8 +879,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 6");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorioaberto.uab.pt/'," +
|
||||
"'oai:repositorioaberto.uab.pt:') WHERE entity_id LIKE 'oai:repositorioaberto.uab.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -900,8 +887,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 7");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.uac.pt/'," +
|
||||
"'oai:repositorio.uac.pt:') WHERE entity_id LIKE 'oai:repositorio.uac.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -909,8 +895,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 8");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.insa.pt/'," +
|
||||
"'oai:repositorio.insa.pt:') WHERE entity_id LIKE 'oai:repositorio.insa.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -918,8 +903,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 9");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipcb.pt/'," +
|
||||
"'oai:repositorio.ipcb.pt:') WHERE entity_id LIKE 'oai:repositorio.ipcb.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -927,8 +911,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 10");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ispa.pt/'," +
|
||||
"'oai:repositorio.ispa.pt:') WHERE entity_id LIKE 'oai:repositorio.ispa.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -936,8 +919,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 11");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chporto.pt/'," +
|
||||
"'oai:repositorio.chporto.pt:') WHERE entity_id LIKE 'oai:repositorio.chporto.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -945,8 +927,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 12");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ucp.pt/'," +
|
||||
"'oai:repositorio.ucp.pt:') WHERE entity_id LIKE 'oai:repositorio.ucp.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -954,8 +935,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 13");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:rihuc.huc.min-saude.pt/'," +
|
||||
"'oai:rihuc.huc.min-saude.pt:') WHERE entity_id LIKE 'oai:rihuc.huc.min-saude.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -963,8 +943,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 14");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipv.pt/'," +
|
||||
"'oai:repositorio.ipv.pt:') WHERE entity_id LIKE 'oai:repositorio.ipv.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -972,8 +951,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 15");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:www.repository.utl.pt/'," +
|
||||
"'oai:www.repository.utl.pt:') WHERE entity_id LIKE 'oai:www.repository.utl.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -981,8 +959,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 16");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:run.unl.pt/'," +
|
||||
"'oai:run.unl.pt:') WHERE entity_id LIKE 'oai:run.unl.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -990,8 +967,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 17");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:sapientia.ualg.pt/'," +
|
||||
"'oai:sapientia.ualg.pt:') WHERE entity_id LIKE 'oai:sapientia.ualg.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -999,8 +975,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 18");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipsantarem.pt/'," +
|
||||
"'oai:repositorio.ipsantarem.pt:') WHERE entity_id LIKE 'oai:repositorio.ipsantarem.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1008,8 +983,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 19");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:arca.igc.gulbenkian.pt/'," +
|
||||
"'oai:arca.igc.gulbenkian.pt:') WHERE entity_id LIKE 'oai:arca.igc.gulbenkian.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1017,8 +991,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 20");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:ubibliorum.ubi.pt/'," +
|
||||
"'oai:ubibliorum.ubi.pt:') WHERE entity_id LIKE 'oai:ubibliorum.ubi.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1026,8 +999,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 21");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:digituma.uma.pt/'," +
|
||||
"'oai:digituma.uma.pt:') WHERE entity_id LIKE 'oai:digituma.uma.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1035,8 +1007,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 22");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ul.pt/'," +
|
||||
"'oai:repositorio.ul.pt:') WHERE entity_id LIKE 'oai:repositorio.ul.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1044,8 +1015,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 23");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hff.min-saude.pt/'," +
|
||||
"'oai:repositorio.hff.min-saude.pt:') WHERE entity_id LIKE 'oai:repositorio.hff.min-saude.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1053,8 +1023,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 24");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorium.sdum.uminho.pt/'," +
|
||||
"'oai:repositorium.sdum.uminho.pt:') WHERE entity_id LIKE 'oai:repositorium.sdum.uminho.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1062,8 +1031,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 25");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:recipp.ipp.pt/'," +
|
||||
"'oai:recipp.ipp.pt:') WHERE entity_id LIKE 'oai:recipp.ipp.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1071,8 +1039,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 26");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:bdigital.ufp.pt/'," +
|
||||
"'oai:bdigital.ufp.pt:') WHERE entity_id LIKE 'oai:bdigital.ufp.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1080,8 +1047,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 27");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:repositorio.lneg.pt/'," +
|
||||
"'oai:repositorio.lneg.pt:') WHERE entity_id LIKE 'oai:repositorio.lneg.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1089,8 +1055,7 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 28");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:iconline.ipleiria.pt/'," +
|
||||
"'oai:iconline.ipleiria.pt:') WHERE entity_id LIKE 'oai:iconline.ipleiria.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
|
@ -1098,14 +1063,12 @@ public class PiwikStatsDB {
|
|||
|
||||
System.out.println("====> Cleaning oai - Step 29");
|
||||
stmt = ConnectDB.getConnection().createStatement();
|
||||
sql =
|
||||
"UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
sql = "UPDATE " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogtmp " +
|
||||
"SET entity_id = regexp_replace(entity_id, '^oai:comum.rcaap.pt/'," +
|
||||
"'oai:comum.rcaap.pt:') WHERE entity_id LIKE 'oai:comum.rcaap.pt/%'";
|
||||
stmt.executeUpdate(sql);
|
||||
stmt.close();
|
||||
|
||||
|
||||
System.out.println("====> Cleaning oai - Done, closing connection");
|
||||
ConnectDB.getConnection().close();
|
||||
}
|
||||
|
|
|
@ -12,13 +12,13 @@ public class UsageStatsExporter {
|
|||
|
||||
static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
|
||||
static String matomoBaseURL = "analytics.openaire.eu";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs3/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs3/Portallogs/";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs4/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs4/Portallogs/";
|
||||
static String portalMatomoID = "109";
|
||||
static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
||||
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs3/irusUKReports";
|
||||
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs3/sarcReports";
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs4/irusUKReports";
|
||||
static String sarcsReportPath = "/user/spyros/logs/usage_stats_logs4/sarcReports";
|
||||
|
||||
public UsageStatsExporter(Properties properties) {
|
||||
this.properties = properties;
|
||||
|
@ -39,7 +39,9 @@ public class UsageStatsExporter {
|
|||
// // the moment
|
||||
System.out.println("====> Initializing the download logs module");
|
||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||
System.out.println("====> Downloading logs");
|
||||
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||
System.out.println("====> Downloaded logs");
|
||||
|
||||
// Create DB tables, insert/update statistics
|
||||
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
||||
|
|
Loading…
Reference in New Issue