diff --git a/dhp-workflows/dhp-usage-stats-update/job-override.properties.bk b/dhp-workflows/dhp-usage-stats-update/job-override.properties.bk
new file mode 100644
index 000000000..67d872e9d
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/job-override.properties.bk
@@ -0,0 +1,7 @@
+stats_db_name=openaire_beta_20200618_stats
+openaire_db_name=openaire_beta_20200618
+external_stats_db_name=stats_ext
+stats_db_shadow_name=openaire_beta_20200618_stats_shadow
+hive_timeout=3000
+hive_spark_client_timeout=100000
+hive_spark_client_server_timeout=100000
diff --git a/dhp-workflows/dhp-usage-stats-update/pom.xml b/dhp-workflows/dhp-usage-stats-update/pom.xml
index dff319b7d..d5bfae133 100644
--- a/dhp-workflows/dhp-usage-stats-update/pom.xml
+++ b/dhp-workflows/dhp-usage-stats-update/pom.xml
@@ -2,16 +2,28 @@
-
+
+
+
+
+ dhp-workflows
+ eu.dnetlib.dhp
+ 1.1.7-SNAPSHOT
+
4.0.0
dhp-usage-stats-update
-
UTF-8
UTF-8
@@ -45,13 +57,16 @@
org.apache.hive
hive-jdbc
${cdh.hive.version}
-
-
org.apache.hadoop
hadoop-common
${cdh.hadoop.version}
-
+
+
+ eu.dnetlib.dhp
+ dhp-common
+ ${project.version}
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java
index 58d725e32..0c66856a4 100644
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/ExecuteWorkflow.java
@@ -9,20 +9,59 @@ package eu.dnetlib.oa.graph.usagestats.export;
import java.io.InputStream;
import java.util.Properties;
+import org.apache.commons.io.IOUtils;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
/**
- * @author dpie
+ * @author dpie, Spyros Zoupanos
*/
public class ExecuteWorkflow {
+ static String matomoAuthToken;
+ static String matomoBaseURL;
+ static String repoLogPath;
+ static String portalLogPath;
+ static String portalMatomoID;
+ static String irusUKBaseURL;
+ static String irusUKReportPath;
+ static String sarcsReportPathArray;
+ static String sarcsReportPathNonArray;
+ static String lareferenciaLogPath;
+ static String lareferenciaBaseURL;
+ static String lareferenciaAuthToken;
+
public static void main(String args[]) throws Exception {
- Properties prop = new Properties();
- InputStream propertiesInputStream = UsageStatsExporter.class
- .getClassLoader()
- .getResourceAsStream("usagestats.properties");
- prop.load(propertiesInputStream);
+// Properties prop = new Properties();
+// InputStream propertiesInputStream = UsageStatsExporter.class
+// .getClassLoader()
+// .getResourceAsStream("usagestats.properties");
+// prop.load(propertiesInputStream);
- UsageStatsExporter usagestatsExport = new UsageStatsExporter(prop);
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+ IOUtils
+ .toString(
+ UsageStatsExporter.class
+ .getResourceAsStream(
+// "/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json")));
+ "/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json")));
+ parser.parseArgument(args);
+
+ matomoAuthToken = parser.get("matomoAuthToken");
+ matomoBaseURL = parser.get("matomoBaseURL");
+ repoLogPath = parser.get("repoLogPath");
+ portalLogPath = parser.get("portalLogPath");
+ portalMatomoID = parser.get("portalMatomoID");
+ irusUKBaseURL = parser.get("irusUKBaseURL");
+ irusUKReportPath = parser.get("irusUKReportPath");
+ sarcsReportPathArray = parser.get("sarcsReportPathArray");
+ sarcsReportPathNonArray = parser.get("sarcsReportPathNonArray");
+ lareferenciaLogPath = parser.get("lareferenciaLogPath");
+ lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
+ lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
+
+ UsageStatsExporter usagestatsExport = new UsageStatsExporter(null);
usagestatsExport.export();
}
}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java
index e33d04ea8..1d271f0cd 100644
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/IrusStats.java
@@ -149,7 +149,7 @@ public class IrusStats {
" >\n" +
")\n" +
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
- "LOCATION '" + UsageStatsExporter.irusUKReportPath + "'\n" +
+ "LOCATION '" + ExecuteWorkflow.irusUKReportPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_sushilogtmp_json);
System.out.println("====> Created sushilogtmp_json table");
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java
index ee69fd67e..208e37eff 100644
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/LaReferenciaStats.java
@@ -174,7 +174,7 @@ public class LaReferenciaStats {
" >" +
")\n" +
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
- "LOCATION '" + UsageStatsExporter.lareferenciaLogPath + "'\n" +
+ "LOCATION '" + ExecuteWorkflow.lareferenciaLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_lareferencialogtmp_json);
System.out.println("====> Created lareferencialogtmp_json");
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java
index 5f46239e0..f7069bb20 100644
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/PiwikStatsDB.java
@@ -262,7 +262,7 @@ public class PiwikStatsDB {
" >\n" +
")\n" +
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
- "LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" +
+ "LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_piwiklogtmp_json);
System.out.println("====> Created piwiklogtmp_json");
@@ -749,7 +749,7 @@ public class PiwikStatsDB {
" >\n" +
")\n" +
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
- "LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" +
+ "LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" +
"TBLPROPERTIES (\"transactional\"=\"false\")";
stmt.executeUpdate(create_process_portal_log_tmp_json);
System.out.println("====> Created process_portal_log_tmp_json");
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java
index 3148e4f2a..a3f18659c 100644
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestats/export/UsageStatsExporter.java
@@ -6,27 +6,29 @@ import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;
-import org.apache.log4j.Logger;
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class UsageStatsExporter {
- private Logger log = Logger.getLogger(this.getClass());
+ private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
private Properties properties;
- static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
- static String matomoBaseURL = "analytics.openaire.eu";
- static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs";
- static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/";
- static String portalMatomoID = "109";
- static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
-
- static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports";
- static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array";
- static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray";
-
- static String lareferenciaLogPath = "/user/spyros/logs/usage_stats_logs6/lareferencia";
- static String lareferenciaBaseURL = "http://matomo.lareferencia.info";
- static String lareferenciaAuthToken = "484874b3655d5a831eb8db33695790c4";
+// static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
+// static String matomoBaseURL = "analytics.openaire.eu";
+// static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs";
+// static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/";
+// static String portalMatomoID = "109";
+// static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
+//
+// static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports";
+// static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array";
+// static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray";
+//
+// static String lareferenciaLogPath = "/user/spyros/logs/usage_stats_logs6/lareferencia";
+// static String lareferenciaBaseURL = "http://matomo.lareferencia.info";
+// static String lareferenciaAuthToken = "484874b3655d5a831eb8db33695790c4";
public UsageStatsExporter(Properties properties) {
this.properties = properties;
@@ -57,9 +59,51 @@ public class UsageStatsExporter {
stmt.close();
}
+// public static void main(String[] args) throws Exception {
+// final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+// IOUtils
+// .toString(
+// UsageStatsExporter.class
+// .getResourceAsStream(
+// "/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json")));
+// parser.parseArgument(args);
+//
+// new UsageStatsExporter(null).run(parser);
+// }
+
+// private void run(ArgumentApplicationParser parser) throws Exception {
+//
+// final String isLookupUrl = parser.get("isLookupUrl");
+// final String sourceNN = parser.get("sourceNameNode");
+// final String targetNN = parser.get("targetNameNode");
+// final String workDir = parser.get("workingDirectory");
+// final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
+//
+// final String distcp_memory_mb = parser.get("distcp_memory_mb");
+// final String distcp_task_timeout = parser.get("distcp_task_timeout");
+//
+// final String transform_only_s = parser.get("transform_only");
+// }
+
// public void export() throws Exception {
public void export() throws Exception {
+ System.out.println(ExecuteWorkflow.matomoAuthToken);
+ System.out.println(ExecuteWorkflow.matomoBaseURL);
+ System.out.println(ExecuteWorkflow.repoLogPath);
+ System.out.println(ExecuteWorkflow.portalLogPath);
+ System.out.println(ExecuteWorkflow.irusUKBaseURL);
+ System.out.println(ExecuteWorkflow.irusUKReportPath);
+ System.out.println(ExecuteWorkflow.sarcsReportPathArray);
+ System.out.println(ExecuteWorkflow.sarcsReportPathNonArray);
+ System.out.println(ExecuteWorkflow.lareferenciaLogPath);
+ System.out.println(ExecuteWorkflow.lareferenciaBaseURL);
+ System.out.println(ExecuteWorkflow.lareferenciaAuthToken);
+
+ logger.info("=====> Test of the logger (info)");
+ logger.debug("=====> Test of the logger (debug)");
+ logger.error("=====> Test of the logger (error)");
+
// connect to DB
System.out.println("====> Initialising DB properties");
ConnectDB.init(properties);
@@ -69,12 +113,12 @@ public class UsageStatsExporter {
// Create DB tables - they are also needed to download the statistics too
System.out.println("====> Creating database and tables");
- PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
+ PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
//
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
// // the moment
System.out.println("====> Initializing the download logs module");
- PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
+ PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken);
System.out.println("====> Downloading piwik logs");
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
System.out.println("====> Downloaded piwik logs");
@@ -85,14 +129,15 @@ public class UsageStatsExporter {
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
System.out.println("====> Processing logs");
piwikstatsdb.processLogs();
- log.info("process logs done");
+// log.info("process logs done");
System.out.println("====> Creating LaReferencia tables");
- LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(lareferenciaBaseURL, lareferenciaAuthToken);
+ LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(ExecuteWorkflow.lareferenciaBaseURL,
+ ExecuteWorkflow.lareferenciaAuthToken);
System.out.println("====> Downloading LaReferencia logs");
// lrf.GetLaReferenciaRepos(lareferenciaLogPath);
System.out.println("====> Downloaded LaReferencia logs");
- LaReferenciaStats lastats = new LaReferenciaStats(lareferenciaLogPath);
+ LaReferenciaStats lastats = new LaReferenciaStats(ExecuteWorkflow.lareferenciaLogPath);
System.out.println("====> Processing LaReferencia logs");
// lastats.processLogs();
// log.info("LaReferencia logs done");
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json
new file mode 100644
index 000000000..5cb89a845
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json
@@ -0,0 +1,78 @@
+[
+ {
+ "paramName": "mat",
+ "paramLongName": "matomoAuthToken",
+ "paramDescription": "when true will stop SparkSession after job execution",
+ "paramRequired": false
+ },
+ {
+ "paramName": "mbu",
+ "paramLongName": "matomoBaseURL",
+ "paramDescription": "URL of the isLookUp Service",
+ "paramRequired": true
+ },
+ {
+ "paramName": "rlp",
+ "paramLongName": "repoLogPath",
+ "paramDescription": "nameNode of the source cluster",
+ "paramRequired": true
+ },
+ {
+ "paramName": "plp",
+ "paramLongName": "portalLogPath",
+ "paramDescription": "namoNode of the target cluster",
+ "paramRequired": true
+ },
+ {
+ "paramName": "pmi",
+ "paramLongName": "portalMatomoID",
+ "paramDescription": "namoNode of the target cluster",
+ "paramRequired": true
+ },
+ {
+ "paramName": "iukbuw",
+ "paramLongName": "irusUKBaseURL",
+ "paramDescription": "working directory",
+ "paramRequired": true
+ },
+ {
+ "paramName": "iukrp",
+ "paramLongName": "irusUKReportPath",
+ "paramDescription": "maximum number of map tasks used in the distcp process",
+ "paramRequired": true
+ },
+ {
+ "paramName": "srpa",
+ "paramLongName": "sarcsReportPathArray",
+ "paramDescription": "memory for distcp action copying actionsets from remote cluster",
+ "paramRequired": true
+ },
+ {
+ "paramName": "srpna",
+ "paramLongName": "sarcsReportPathNonArray",
+ "paramDescription": "timeout for distcp copying actions from remote cluster",
+ "paramRequired": true
+ },
+ {
+ "paramName": "llp",
+ "paramLongName": "lareferenciaLogPath",
+ "paramDescription": "activate tranform-only mode. Only apply transformation step",
+ "paramRequired": true
+ },
+ {
+ "paramName": "lbu",
+ "paramLongName": "lareferenciaBaseURL",
+ "paramDescription": "activate tranform-only mode. Only apply transformation step",
+ "paramRequired": true
+ },
+ {
+ "paramName": "lat",
+ "paramLongName": "lareferenciaAuthToken",
+ "paramDescription": "activate tranform-only mode. Only apply transformation step",
+ "paramRequired": true
+ }
+]
+
+
+
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml
index ec39785b0..df349e49e 100644
--- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestats/oozie_app/workflow.xml
@@ -38,6 +38,19 @@
eu.dnetlib.oa.graph.usagestats.export.ExecuteWorkflow
+ --matomoAuthToken${matomoAuthToken}
+ --matomoBaseURL${matomoBaseURL}
+ --repoLogPath${repoLogPath}
+ --portalLogPath${portalLogPath}
+ --portalMatomoID${portalMatomoID}
+ --irusUKBaseURL${irusUKBaseURL}
+ --irusUKReportPath${irusUKReportPath}
+ --sarcsReportPathArray${sarcsReportPathArray}
+ --sarcsReportPathNonArray${sarcsReportPathNonArray}
+ --lareferenciaLogPath${lareferenciaLogPath}
+ --lareferenciaBaseURL${lareferenciaBaseURL}
+ --lareferenciaAuthToken${lareferenciaAuthToken}
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/usagestats.properties b/dhp-workflows/dhp-usage-stats-update/src/main/resources/usagestats.properties
deleted file mode 100755
index d42df84a9..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/usagestats.properties
+++ /dev/null
@@ -1,24 +0,0 @@
-#logging.config=log4j.properties
-db.driverClassName=org.postgresql.Driver
-#spring.datasource.url=jdbc:postgresql://88.197.53.70:5432/stats
-#spring.datasource.url=jdbc:postgresql://statsdb-beta.openaire.eu:5432/stats
-db.Url=jdbc:postgresql://88.197.53.70:5432/stats
-#db.Url=jdbc:postgresql://statsdb-prod.openaire.eu/stats
-db.username=sqoop
-db.password=sqoop
-db.defaultschema=usagestats
-matomo.AuthToken=703bd17d845acdaf795e01bb1e0895b9
-matomo.BaseUrl=analytics.openaire.eu
-#repo.LogPath=/Users/dpie/Desktop/Repologs/
-repo.LogPath=/user/spyros/logs/usage_stats_logs/Repologs
-portal.LogPath=/user/spyros/logs/usage_stats_logs/Portallogs/
-portal.MatomoID=109
-COUNTER.robots.Url=https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json
-IRUS_UK.BaseUrl=https://irus.jisc.ac.uk/api/sushilite/v1_7/
-#server.compression.enabled=true
-#compression.max_number_of_records=1000
-#usagestats.redis.hostname=localhost
-#usagestats.redis.port=6379
-#spring.jackson.serialization.INDENT_OUTPUT=true
-#download.folder=/Users/dpie/DownloadSushiLite
-#sushi-lite.server=http://localhost:8080
\ No newline at end of file