forked from D-Net/dnet-hadoop
161 lines
6.6 KiB
Java
161 lines
6.6 KiB
Java
|
|
package eu.dnetlib.oa.graph.usagestats.export;
|
|
|
|
import java.sql.ResultSet;
|
|
import java.sql.SQLException;
|
|
import java.sql.Statement;
|
|
import java.util.Properties;
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
public class UsageStatsExporter {
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
|
|
private Properties properties;
|
|
|
|
// static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
|
|
// static String matomoBaseURL = "analytics.openaire.eu";
|
|
// static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs";
|
|
// static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/";
|
|
// static String portalMatomoID = "109";
|
|
// static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
|
//
|
|
// static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports";
|
|
// static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array";
|
|
// static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray";
|
|
//
|
|
// static String lareferenciaLogPath = "/user/spyros/logs/usage_stats_logs6/lareferencia";
|
|
// static String lareferenciaBaseURL = "http://matomo.lareferencia.info";
|
|
// static String lareferenciaAuthToken = "484874b3655d5a831eb8db33695790c4";
|
|
|
|
public UsageStatsExporter(Properties properties) {
|
|
this.properties = properties;
|
|
}
|
|
|
|
public void runImpalaQuery() throws Exception {
|
|
Statement stmt = ConnectDB.getImpalaConnection().createStatement();
|
|
ConnectDB.getImpalaConnection().setAutoCommit(false);
|
|
|
|
System.out.println("====> Executing Impala query");
|
|
Statement statement = ConnectDB.getImpalaConnection().createStatement();
|
|
|
|
ResultSet rs = statement
|
|
.executeQuery(
|
|
// "CREATE TABLE usagestats_20200913.spyros_tmp5 AS\n" +
|
|
// "SELECT s.source, d.id AS repository_id, ro.id as result_id, s.count, '0' \n" +
|
|
// "FROM usagestats_20200913.sarc_sushilogtmp2 s, \n" +
|
|
// "openaire_prod_stats_shadow_20200821.datasource_oids d, \n" +
|
|
// "openaire_prod_stats_shadow_20200821.datasource_results dr, \n" +
|
|
// "openaire_prod_stats_shadow_20200821.result_pids ro \n" +
|
|
// "WHERE d.oid LIKE CONCAT('%', s.repository, '%') AND dr.id=d.id AND dr.result=ro.id \n" +
|
|
// "AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS' ");
|
|
|
|
"CREATE TABLE usagestats_20200913.spyros_tmp6 AS\n" +
|
|
"SELECT * \n" +
|
|
"FROM usagestats_20200913.sarc_sushilogtmp2");
|
|
|
|
stmt.close();
|
|
}
|
|
|
|
// public static void main(String[] args) throws Exception {
|
|
// final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
|
// IOUtils
|
|
// .toString(
|
|
// UsageStatsExporter.class
|
|
// .getResourceAsStream(
|
|
// "/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json")));
|
|
// parser.parseArgument(args);
|
|
//
|
|
// new UsageStatsExporter(null).run(parser);
|
|
// }
|
|
|
|
// private void run(ArgumentApplicationParser parser) throws Exception {
|
|
//
|
|
// final String isLookupUrl = parser.get("isLookupUrl");
|
|
// final String sourceNN = parser.get("sourceNameNode");
|
|
// final String targetNN = parser.get("targetNameNode");
|
|
// final String workDir = parser.get("workingDirectory");
|
|
// final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
|
|
//
|
|
// final String distcp_memory_mb = parser.get("distcp_memory_mb");
|
|
// final String distcp_task_timeout = parser.get("distcp_task_timeout");
|
|
//
|
|
// final String transform_only_s = parser.get("transform_only");
|
|
// }
|
|
|
|
// public void export() throws Exception {
|
|
public void export() throws Exception {
|
|
|
|
System.out.println(ExecuteWorkflow.matomoAuthToken);
|
|
System.out.println(ExecuteWorkflow.matomoBaseURL);
|
|
System.out.println(ExecuteWorkflow.repoLogPath);
|
|
System.out.println(ExecuteWorkflow.portalLogPath);
|
|
System.out.println(ExecuteWorkflow.irusUKBaseURL);
|
|
System.out.println(ExecuteWorkflow.irusUKReportPath);
|
|
System.out.println(ExecuteWorkflow.sarcsReportPathArray);
|
|
System.out.println(ExecuteWorkflow.sarcsReportPathNonArray);
|
|
System.out.println(ExecuteWorkflow.lareferenciaLogPath);
|
|
System.out.println(ExecuteWorkflow.lareferenciaBaseURL);
|
|
System.out.println(ExecuteWorkflow.lareferenciaAuthToken);
|
|
|
|
logger.info("=====> Test of the logger (info)");
|
|
logger.debug("=====> Test of the logger (debug)");
|
|
logger.error("=====> Test of the logger (error)");
|
|
|
|
// connect to DB
|
|
System.out.println("====> Initialising DB properties");
|
|
ConnectDB.init(properties);
|
|
|
|
runImpalaQuery();
|
|
System.exit(0);
|
|
|
|
// Create DB tables - they are also needed to download the statistics too
|
|
System.out.println("====> Creating database and tables");
|
|
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
|
//
|
|
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
|
// // the moment
|
|
System.out.println("====> Initializing the download logs module");
|
|
PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken);
|
|
System.out.println("====> Downloading piwik logs");
|
|
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
|
System.out.println("====> Downloaded piwik logs");
|
|
|
|
// Create DB tables, insert/update statistics
|
|
// String cRobotsUrl = properties.getProperty("COUNTER_robots_Url");
|
|
String cRobotsUrl = "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json";
|
|
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
|
System.out.println("====> Processing logs");
|
|
piwikstatsdb.processLogs();
|
|
// log.info("process logs done");
|
|
|
|
System.out.println("====> Creating LaReferencia tables");
|
|
LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(ExecuteWorkflow.lareferenciaBaseURL,
|
|
ExecuteWorkflow.lareferenciaAuthToken);
|
|
System.out.println("====> Downloading LaReferencia logs");
|
|
// lrf.GetLaReferenciaRepos(lareferenciaLogPath);
|
|
System.out.println("====> Downloaded LaReferencia logs");
|
|
LaReferenciaStats lastats = new LaReferenciaStats(ExecuteWorkflow.lareferenciaLogPath);
|
|
System.out.println("====> Processing LaReferencia logs");
|
|
// lastats.processLogs();
|
|
// log.info("LaReferencia logs done");
|
|
|
|
// IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
|
// irusstats.getIrusRRReport(irusUKReportPath);
|
|
|
|
// irusstats.processIrusStats();
|
|
// log.info("irus done");
|
|
|
|
// SarcStats sarcStats = new SarcStats();
|
|
// sarcStats.getAndProcessSarc(sarcsReportPathArray, sarcsReportPathNonArray);
|
|
// sarcStats.finalizeSarcStats();
|
|
// log.info("sarc done");
|
|
|
|
// // finalize usagestats
|
|
// piwikstatsdb.finalizeStats();
|
|
// log.info("finalized stats");
|
|
}
|
|
}
|