diff --git a/dhp-workflows/dhp-usage-stats-update/job-override.properties.bk b/dhp-workflows/dhp-usage-stats-update/job-override.properties.bk new file mode 100644 index 000000000..67d872e9d --- /dev/null +++ b/dhp-workflows/dhp-usage-stats-update/job-override.properties.bk @@ -0,0 +1,7 @@ +stats_db_name=openaire_beta_20200618_stats +openaire_db_name=openaire_beta_20200618 +external_stats_db_name=stats_ext +stats_db_shadow_name=openaire_beta_20200618_stats_shadow +hive_timeout=3000 +hive_spark_client_timeout=100000 +hive_spark_client_server_timeout=100000 diff --git a/dhp-workflows/dhp-usage-stats-update/pom.xml b/dhp-workflows/dhp-usage-stats-update/pom.xml index dff319b7d..d5bfae133 100644 --- a/dhp-workflows/dhp-usage-stats-update/pom.xml +++ b/dhp-workflows/dhp-usage-stats-update/pom.xml @@ -2,16 +2,28 @@ - + + + + + dhp-workflows + eu.dnetlib.dhp + 1.1.7-SNAPSHOT + 4.0.0 dhp-usage-stats-update - UTF-8 UTF-8 @@ -45,13 +57,16 @@ org.apache.hive hive-jdbc ${cdh.hive.version} - - org.apache.hadoop hadoop-common ${cdh.hadoop.version} - + + + eu.dnetlib.dhp + dhp-common + ${project.version} + diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java index 58d725e32..0c66856a4 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java @@ -9,20 +9,59 @@ package eu.dnetlib.oa.graph.usagestats.export; import java.io.InputStream; import java.util.Properties; +import org.apache.commons.io.IOUtils; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + /** - * @author dpie + * @author dpie, Spyros Zoupanos */ public class ExecuteWorkflow { + static String matomoAuthToken; + static String matomoBaseURL; + static String repoLogPath; + static String portalLogPath; + static String portalMatomoID; + static String irusUKBaseURL; + static String irusUKReportPath; + static String sarcsReportPathArray; + static String sarcsReportPathNonArray; + static String lareferenciaLogPath; + static String lareferenciaBaseURL; + static String lareferenciaAuthToken; + public static void main(String args[]) throws Exception { - Properties prop = new Properties(); - InputStream propertiesInputStream = UsageStatsExporter.class - .getClassLoader() - .getResourceAsStream("usagestats.properties"); - prop.load(propertiesInputStream); +// Properties prop = new Properties(); +// InputStream propertiesInputStream = UsageStatsExporter.class +// .getClassLoader() +// .getResourceAsStream("usagestats.properties"); +// prop.load(propertiesInputStream); - UsageStatsExporter usagestatsExport = new UsageStatsExporter(prop); + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + UsageStatsExporter.class + .getResourceAsStream( +// "/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json"))); + "/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json"))); + parser.parseArgument(args); + + matomoAuthToken = parser.get("matomoAuthToken"); + matomoBaseURL = parser.get("matomoBaseURL"); + repoLogPath = parser.get("repoLogPath"); + portalLogPath = parser.get("portalLogPath"); + portalMatomoID = parser.get("portalMatomoID"); + irusUKBaseURL = parser.get("irusUKBaseURL"); + irusUKReportPath = parser.get("irusUKReportPath"); + sarcsReportPathArray = parser.get("sarcsReportPathArray"); + sarcsReportPathNonArray = parser.get("sarcsReportPathNonArray"); + lareferenciaLogPath = parser.get("lareferenciaLogPath"); + lareferenciaBaseURL = parser.get("lareferenciaBaseURL"); + lareferenciaAuthToken = parser.get("lareferenciaAuthToken"); + + UsageStatsExporter usagestatsExport = new UsageStatsExporter(null); usagestatsExport.export(); } } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java index e33d04ea8..1d271f0cd 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java @@ -149,7 +149,7 @@ public class IrusStats { " >\n" + ")\n" + "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" + - "LOCATION '" + UsageStatsExporter.irusUKReportPath + "'\n" + + "LOCATION '" + ExecuteWorkflow.irusUKReportPath + "'\n" + "TBLPROPERTIES (\"transactional\"=\"false\")"; stmt.executeUpdate(create_sushilogtmp_json); System.out.println("====> Created sushilogtmp_json table"); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java index ee69fd67e..208e37eff 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java @@ -174,7 +174,7 @@ public class LaReferenciaStats { " >" + ")\n" + "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" + - "LOCATION '" + UsageStatsExporter.lareferenciaLogPath + "'\n" + + "LOCATION '" + ExecuteWorkflow.lareferenciaLogPath + "'\n" + "TBLPROPERTIES (\"transactional\"=\"false\")"; stmt.executeUpdate(create_lareferencialogtmp_json); System.out.println("====> Created lareferencialogtmp_json"); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java index 5f46239e0..f7069bb20 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java @@ -262,7 +262,7 @@ public class PiwikStatsDB { " >\n" + ")\n" + "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" + - "LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" + + "LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" + "TBLPROPERTIES (\"transactional\"=\"false\")"; stmt.executeUpdate(create_piwiklogtmp_json); System.out.println("====> Created piwiklogtmp_json"); @@ -749,7 +749,7 @@ public class PiwikStatsDB { " >\n" + ")\n" + "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" + - "LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" + + "LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" + "TBLPROPERTIES (\"transactional\"=\"false\")"; stmt.executeUpdate(create_process_portal_log_tmp_json); System.out.println("====> Created process_portal_log_tmp_json"); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java index 3148e4f2a..a3f18659c 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java @@ -6,27 +6,29 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.Properties; -import org.apache.log4j.Logger; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class UsageStatsExporter { - private Logger log = Logger.getLogger(this.getClass()); + private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class); private Properties properties; - static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9"; - static String matomoBaseURL = "analytics.openaire.eu"; - static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs"; - static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/"; - static String portalMatomoID = "109"; - static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/"; - - static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports"; - static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array"; - static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray"; - - static String lareferenciaLogPath = "/user/spyros/logs/usage_stats_logs6/lareferencia"; - static String lareferenciaBaseURL = "http://matomo.lareferencia.info"; - static String lareferenciaAuthToken = "484874b3655d5a831eb8db33695790c4"; +// static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9"; +// static String matomoBaseURL = "analytics.openaire.eu"; +// static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs"; +// static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/"; +// static String portalMatomoID = "109"; +// static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/"; +// +// static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports"; +// static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array"; +// static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray"; +// +// static String lareferenciaLogPath = "/user/spyros/logs/usage_stats_logs6/lareferencia"; +// static String lareferenciaBaseURL = "http://matomo.lareferencia.info"; +// static String lareferenciaAuthToken = "484874b3655d5a831eb8db33695790c4"; public UsageStatsExporter(Properties properties) { this.properties = properties; @@ -57,9 +59,51 @@ public class UsageStatsExporter { stmt.close(); } +// public static void main(String[] args) throws Exception { +// final ArgumentApplicationParser parser = new ArgumentApplicationParser( +// IOUtils +// .toString( +// UsageStatsExporter.class +// .getResourceAsStream( +// "/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json"))); +// parser.parseArgument(args); +// +// new UsageStatsExporter(null).run(parser); +// } + +// private void run(ArgumentApplicationParser parser) throws Exception { +// +// final String isLookupUrl = parser.get("isLookupUrl"); +// final String sourceNN = parser.get("sourceNameNode"); +// final String targetNN = parser.get("targetNameNode"); +// final String workDir = parser.get("workingDirectory"); +// final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps")); +// +// final String distcp_memory_mb = parser.get("distcp_memory_mb"); +// final String distcp_task_timeout = parser.get("distcp_task_timeout"); +// +// final String transform_only_s = parser.get("transform_only"); +// } + // public void export() throws Exception { public void export() throws Exception { + System.out.println(ExecuteWorkflow.matomoAuthToken); + System.out.println(ExecuteWorkflow.matomoBaseURL); + System.out.println(ExecuteWorkflow.repoLogPath); + System.out.println(ExecuteWorkflow.portalLogPath); + System.out.println(ExecuteWorkflow.irusUKBaseURL); + System.out.println(ExecuteWorkflow.irusUKReportPath); + System.out.println(ExecuteWorkflow.sarcsReportPathArray); + System.out.println(ExecuteWorkflow.sarcsReportPathNonArray); + System.out.println(ExecuteWorkflow.lareferenciaLogPath); + System.out.println(ExecuteWorkflow.lareferenciaBaseURL); + System.out.println(ExecuteWorkflow.lareferenciaAuthToken); + + logger.info("=====> Test of the logger (info)"); + logger.debug("=====> Test of the logger (debug)"); + logger.error("=====> Test of the logger (error)"); + // connect to DB System.out.println("====> Initialising DB properties"); ConnectDB.init(properties); @@ -69,12 +113,12 @@ public class UsageStatsExporter { // Create DB tables - they are also needed to download the statistics too System.out.println("====> Creating database and tables"); - PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath); + PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath); // // // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for // // the moment System.out.println("====> Initializing the download logs module"); - PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken); + PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken); System.out.println("====> Downloading piwik logs"); // piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID); System.out.println("====> Downloaded piwik logs"); @@ -85,14 +129,15 @@ public class UsageStatsExporter { piwikstatsdb.setCounterRobotsURL(cRobotsUrl); System.out.println("====> Processing logs"); piwikstatsdb.processLogs(); - log.info("process logs done"); +// log.info("process logs done"); System.out.println("====> Creating LaReferencia tables"); - LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(lareferenciaBaseURL, lareferenciaAuthToken); + LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(ExecuteWorkflow.lareferenciaBaseURL, + ExecuteWorkflow.lareferenciaAuthToken); System.out.println("====> Downloading LaReferencia logs"); // lrf.GetLaReferenciaRepos(lareferenciaLogPath); System.out.println("====> Downloaded LaReferencia logs"); - LaReferenciaStats lastats = new LaReferenciaStats(lareferenciaLogPath); + LaReferenciaStats lastats = new LaReferenciaStats(ExecuteWorkflow.lareferenciaLogPath); System.out.println("====> Processing LaReferencia logs"); // lastats.processLogs(); // log.info("LaReferencia logs done"); diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json new file mode 100644 index 000000000..5cb89a845 --- /dev/null +++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json @@ -0,0 +1,78 @@ +[ + { + "paramName": "mat", + "paramLongName": "matomoAuthToken", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "mbu", + "paramLongName": "matomoBaseURL", + "paramDescription": "URL of the isLookUp Service", + "paramRequired": true + }, + { + "paramName": "rlp", + "paramLongName": "repoLogPath", + "paramDescription": "nameNode of the source cluster", + "paramRequired": true + }, + { + "paramName": "plp", + "paramLongName": "portalLogPath", + "paramDescription": "namoNode of the target cluster", + "paramRequired": true + }, + { + "paramName": "pmi", + "paramLongName": "portalMatomoID", + "paramDescription": "namoNode of the target cluster", + "paramRequired": true + }, + { + "paramName": "iukbuw", + "paramLongName": "irusUKBaseURL", + "paramDescription": "working directory", + "paramRequired": true + }, + { + "paramName": "iukrp", + "paramLongName": "irusUKReportPath", + "paramDescription": "maximum number of map tasks used in the distcp process", + "paramRequired": true + }, + { + "paramName": "srpa", + "paramLongName": "sarcsReportPathArray", + "paramDescription": "memory for distcp action copying actionsets from remote cluster", + "paramRequired": true + }, + { + "paramName": "srpna", + "paramLongName": "sarcsReportPathNonArray", + "paramDescription": "timeout for distcp copying actions from remote cluster", + "paramRequired": true + }, + { + "paramName": "llp", + "paramLongName": "lareferenciaLogPath", + "paramDescription": "activate tranform-only mode. Only apply transformation step", + "paramRequired": true + }, + { + "paramName": "lbu", + "paramLongName": "lareferenciaBaseURL", + "paramDescription": "activate tranform-only mode. Only apply transformation step", + "paramRequired": true + }, + { + "paramName": "lat", + "paramLongName": "lareferenciaAuthToken", + "paramDescription": "activate tranform-only mode. Only apply transformation step", + "paramRequired": true + } +] + + + + diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml index ec39785b0..df349e49e 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml @@ -38,6 +38,19 @@ eu.dnetlib.oa.graph.usagestats.export.ExecuteWorkflow + --matomoAuthToken${matomoAuthToken} + --matomoBaseURL${matomoBaseURL} + --repoLogPath${repoLogPath} + --portalLogPath${portalLogPath} + --portalMatomoID${portalMatomoID} + --irusUKBaseURL${irusUKBaseURL} + --irusUKReportPath${irusUKReportPath} + --sarcsReportPathArray${sarcsReportPathArray} + --sarcsReportPathNonArray${sarcsReportPathNonArray} + --lareferenciaLogPath${lareferenciaLogPath} + --lareferenciaBaseURL${lareferenciaBaseURL} + --lareferenciaAuthToken${lareferenciaAuthToken} + diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/usagestats.properties b/dhp-workflows/dhp-usage-stats-update/src/main/resources/usagestats.properties deleted file mode 100755 index d42df84a9..000000000 --- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/usagestats.properties +++ /dev/null @@ -1,24 +0,0 @@ -#logging.config=log4j.properties -db.driverClassName=org.postgresql.Driver -#spring.datasource.url=jdbc:postgresql://88.197.53.70:5432/stats -#spring.datasource.url=jdbc:postgresql://statsdb-beta.openaire.eu:5432/stats -db.Url=jdbc:postgresql://88.197.53.70:5432/stats -#db.Url=jdbc:postgresql://statsdb-prod.openaire.eu/stats -db.username=sqoop -db.password=sqoop -db.defaultschema=usagestats -matomo.AuthToken=703bd17d845acdaf795e01bb1e0895b9 -matomo.BaseUrl=analytics.openaire.eu -#repo.LogPath=/Users/dpie/Desktop/Repologs/ -repo.LogPath=/user/spyros/logs/usage_stats_logs/Repologs -portal.LogPath=/user/spyros/logs/usage_stats_logs/Portallogs/ -portal.MatomoID=109 -COUNTER.robots.Url=https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json -IRUS_UK.BaseUrl=https://irus.jisc.ac.uk/api/sushilite/v1_7/ -#server.compression.enabled=true -#compression.max_number_of_records=1000 -#usagestats.redis.hostname=localhost -#usagestats.redis.port=6379 -#spring.jackson.serialization.INDENT_OUTPUT=true -#download.folder=/Users/dpie/DownloadSushiLite -#sushi-lite.server=http://localhost:8080 \ No newline at end of file