2020-11-17 07:36:51 +01:00
|
|
|
|
|
|
|
package eu.dnetlib.oa.graph.usagestatsbuild.export;
|
|
|
|
|
|
|
|
import java.io.*;
|
|
|
|
import java.net.URLDecoder;
|
|
|
|
import java.sql.Connection;
|
|
|
|
import java.sql.PreparedStatement;
|
|
|
|
import java.sql.SQLException;
|
|
|
|
import java.sql.Statement;
|
|
|
|
import java.sql.Timestamp;
|
|
|
|
import java.text.SimpleDateFormat;
|
|
|
|
import java.util.*;
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
|
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
|
|
|
import org.apache.hadoop.fs.Path;
|
|
|
|
import org.apache.hadoop.fs.RemoteIterator;
|
|
|
|
import org.json.simple.JSONArray;
|
|
|
|
import org.json.simple.JSONObject;
|
|
|
|
import org.json.simple.parser.JSONParser;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @author D. Pierrakos, S. Zoupanos
|
|
|
|
*/
|
|
|
|
public class LaReferenciaStats {
|
|
|
|
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(LaReferenciaStats.class);
|
|
|
|
|
|
|
|
private String logRepoPath;
|
|
|
|
|
|
|
|
private Statement stmt = null;
|
|
|
|
|
|
|
|
private String CounterRobotsURL;
|
|
|
|
private ArrayList robotsList;
|
|
|
|
|
|
|
|
public LaReferenciaStats() throws Exception {
|
|
|
|
}
|
|
|
|
|
|
|
|
public void processLogs() throws Exception {
|
|
|
|
try {
|
|
|
|
logger.info("LaReferencia creating viewsStats");
|
|
|
|
viewsStats();
|
|
|
|
logger.info("LaReferencia created viewsStats");
|
|
|
|
logger.info("LaReferencia creating downloadsStats");
|
|
|
|
downloadsStats();
|
|
|
|
logger.info("LaReferencia created downloadsStats");
|
|
|
|
|
|
|
|
// logger.info("LaReferencia updating Production Tables");
|
|
|
|
// updateProdTables();
|
|
|
|
// logger.info("LaReferencia updated Production Tables");
|
|
|
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error("Failed to process logs: " + e);
|
|
|
|
throw new Exception("Failed to process logs: " + e.toString(), e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public void viewsStats() throws Exception {
|
|
|
|
|
|
|
|
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
|
|
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
|
|
|
|
|
|
|
logger.info("Creating la_result_views_monthly_tmp view");
|
|
|
|
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".la_result_views_monthly_tmp AS "
|
|
|
|
+
|
|
|
|
"SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
|
|
|
|
"THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
|
|
|
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
|
|
|
"FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog where action='action' and " +
|
|
|
|
"(source_item_type='oaItem' or source_item_type='repItem') " +
|
|
|
|
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
|
|
|
"source ORDER BY source, entity_id";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
logger.info("Created la_result_views_monthly_tmp view");
|
|
|
|
|
|
|
|
logger.info("Dropping la_views_stats_tmp table");
|
|
|
|
sql = "DROP TABLE IF EXISTS " +
|
|
|
|
ConnectDB.getUsageStatsDBSchema() +
|
|
|
|
".la_views_stats_tmp";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
logger.info("Dropped la_views_stats_tmp table");
|
|
|
|
|
|
|
|
logger.info("Creating la_views_stats_tmp table");
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp " +
|
|
|
|
"AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
|
|
|
"max(views) AS count, max(openaire_referrer) AS openaire " +
|
|
|
|
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_result_views_monthly_tmp p, " +
|
|
|
|
ConnectDB.getStatsDBSchema() + ".datasource_oids d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
|
|
|
|
"WHERE p.source=d.oid AND p.id=ro.oid " +
|
|
|
|
"GROUP BY d.id, ro.id, month " +
|
|
|
|
"ORDER BY d.id, ro.id, month";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
logger.info("Created la_views_stats_tmp table");
|
|
|
|
|
|
|
|
stmt.close();
|
2020-12-12 11:00:14 +01:00
|
|
|
// ConnectDB.getHiveConnection().close();
|
2020-11-17 07:36:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
private void downloadsStats() throws Exception {
|
|
|
|
|
|
|
|
Statement stmt = ConnectDB.getHiveConnection().createStatement();
|
|
|
|
ConnectDB.getHiveConnection().setAutoCommit(false);
|
|
|
|
|
|
|
|
logger.info("Creating la_result_downloads_monthly_tmp view");
|
|
|
|
String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
|
|
|
|
+ ".la_result_downloads_monthly_tmp AS " +
|
|
|
|
"SELECT entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
|
|
|
|
"THEN 1 ELSE 0 END) AS openaire_referrer, " +
|
|
|
|
"CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
|
|
|
|
"FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog where action='download' and " +
|
|
|
|
"(source_item_type='oaItem' or source_item_type='repItem') " +
|
|
|
|
"GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
|
|
|
|
"source ORDER BY source, entity_id";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
logger.info("Created la_result_downloads_monthly_tmp view");
|
|
|
|
|
|
|
|
logger.info("Dropping la_downloads_stats_tmp table");
|
|
|
|
sql = "DROP TABLE IF EXISTS " +
|
|
|
|
ConnectDB.getUsageStatsDBSchema() +
|
|
|
|
".la_downloads_stats_tmp";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
logger.info("Dropped la_downloads_stats_tmp table");
|
|
|
|
|
|
|
|
logger.info("Creating la_downloads_stats_tmp table");
|
|
|
|
sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp " +
|
|
|
|
"AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date, " +
|
|
|
|
"max(downloads) AS count, max(openaire_referrer) AS openaire " +
|
|
|
|
"FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_result_downloads_monthly_tmp p, " +
|
|
|
|
ConnectDB.getStatsDBSchema() + ".datasource_oids d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
|
|
|
|
"WHERE p.source=d.oid AND p.id=ro.oid " +
|
|
|
|
"GROUP BY d.id, ro.id, month " +
|
|
|
|
"ORDER BY d.id, ro.id, month";
|
|
|
|
stmt.executeUpdate(sql);
|
|
|
|
logger.info("Created la_downloads_stats_tmp table");
|
|
|
|
|
|
|
|
stmt.close();
|
2020-12-12 11:00:14 +01:00
|
|
|
// ConnectDB.getHiveConnection().close();
|
2020-11-17 07:36:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|