forked from D-Net/dnet-hadoop
Correct parameter parsing
This commit is contained in:
parent
dc6114a24e
commit
c1257ac7c5
|
@ -0,0 +1,7 @@
|
|||
stats_db_name=openaire_beta_20200618_stats
|
||||
openaire_db_name=openaire_beta_20200618
|
||||
external_stats_db_name=stats_ext
|
||||
stats_db_shadow_name=openaire_beta_20200618_stats_shadow
|
||||
hive_timeout=3000
|
||||
hive_spark_client_timeout=100000
|
||||
hive_spark_client_server_timeout=100000
|
|
@ -2,16 +2,28 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
|
||||
|
||||
<parent>
|
||||
<!-- <parent>
|
||||
<artifactId>dhp-workflows</artifactId >
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.1.7-SNAPSHOT</version>
|
||||
</parent>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<groupId>eu.dnetlib</groupId> -->
|
||||
<!-- <parent>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<version>1.1.7-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>dhp-usage-stats-update</artifactId> -->
|
||||
|
||||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.1.7-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>dhp-usage-stats-update</artifactId>
|
||||
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||
|
@ -45,13 +57,16 @@
|
|||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${cdh.hive.version}</version>
|
||||
<!-- <version>3.1.2</version> -->
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<version>${cdh.hadoop.version}</version>
|
||||
</dependency>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
|
|
@ -9,20 +9,59 @@ package eu.dnetlib.oa.graph.usagestats.export;
|
|||
import java.io.InputStream;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
* @author dpie, Spyros Zoupanos
|
||||
*/
|
||||
public class ExecuteWorkflow {
|
||||
|
||||
static String matomoAuthToken;
|
||||
static String matomoBaseURL;
|
||||
static String repoLogPath;
|
||||
static String portalLogPath;
|
||||
static String portalMatomoID;
|
||||
static String irusUKBaseURL;
|
||||
static String irusUKReportPath;
|
||||
static String sarcsReportPathArray;
|
||||
static String sarcsReportPathNonArray;
|
||||
static String lareferenciaLogPath;
|
||||
static String lareferenciaBaseURL;
|
||||
static String lareferenciaAuthToken;
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
|
||||
Properties prop = new Properties();
|
||||
InputStream propertiesInputStream = UsageStatsExporter.class
|
||||
.getClassLoader()
|
||||
.getResourceAsStream("usagestats.properties");
|
||||
prop.load(propertiesInputStream);
|
||||
// Properties prop = new Properties();
|
||||
// InputStream propertiesInputStream = UsageStatsExporter.class
|
||||
// .getClassLoader()
|
||||
// .getResourceAsStream("usagestats.properties");
|
||||
// prop.load(propertiesInputStream);
|
||||
|
||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter(prop);
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
UsageStatsExporter.class
|
||||
.getResourceAsStream(
|
||||
// "/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json")));
|
||||
"/eu/dnetlib/dhp/oa/graph/usagestats/export/usagestats_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
matomoAuthToken = parser.get("matomoAuthToken");
|
||||
matomoBaseURL = parser.get("matomoBaseURL");
|
||||
repoLogPath = parser.get("repoLogPath");
|
||||
portalLogPath = parser.get("portalLogPath");
|
||||
portalMatomoID = parser.get("portalMatomoID");
|
||||
irusUKBaseURL = parser.get("irusUKBaseURL");
|
||||
irusUKReportPath = parser.get("irusUKReportPath");
|
||||
sarcsReportPathArray = parser.get("sarcsReportPathArray");
|
||||
sarcsReportPathNonArray = parser.get("sarcsReportPathNonArray");
|
||||
lareferenciaLogPath = parser.get("lareferenciaLogPath");
|
||||
lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
|
||||
lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
|
||||
|
||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter(null);
|
||||
usagestatsExport.export();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -149,7 +149,7 @@ public class IrusStats {
|
|||
" >\n" +
|
||||
")\n" +
|
||||
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
|
||||
"LOCATION '" + UsageStatsExporter.irusUKReportPath + "'\n" +
|
||||
"LOCATION '" + ExecuteWorkflow.irusUKReportPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_sushilogtmp_json);
|
||||
System.out.println("====> Created sushilogtmp_json table");
|
||||
|
|
|
@ -174,7 +174,7 @@ public class LaReferenciaStats {
|
|||
" >" +
|
||||
")\n" +
|
||||
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
|
||||
"LOCATION '" + UsageStatsExporter.lareferenciaLogPath + "'\n" +
|
||||
"LOCATION '" + ExecuteWorkflow.lareferenciaLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_lareferencialogtmp_json);
|
||||
System.out.println("====> Created lareferencialogtmp_json");
|
||||
|
|
|
@ -262,7 +262,7 @@ public class PiwikStatsDB {
|
|||
" >\n" +
|
||||
")\n" +
|
||||
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
|
||||
"LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" +
|
||||
"LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_piwiklogtmp_json);
|
||||
System.out.println("====> Created piwiklogtmp_json");
|
||||
|
@ -749,7 +749,7 @@ public class PiwikStatsDB {
|
|||
" >\n" +
|
||||
")\n" +
|
||||
"ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
|
||||
"LOCATION '" + UsageStatsExporter.repoLogPath + "'\n" +
|
||||
"LOCATION '" + ExecuteWorkflow.repoLogPath + "'\n" +
|
||||
"TBLPROPERTIES (\"transactional\"=\"false\")";
|
||||
stmt.executeUpdate(create_process_portal_log_tmp_json);
|
||||
System.out.println("====> Created process_portal_log_tmp_json");
|
||||
|
|
|
@ -6,27 +6,29 @@ import java.sql.SQLException;
|
|||
import java.sql.Statement;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class UsageStatsExporter {
|
||||
|
||||
private Logger log = Logger.getLogger(this.getClass());
|
||||
private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
|
||||
private Properties properties;
|
||||
|
||||
static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
|
||||
static String matomoBaseURL = "analytics.openaire.eu";
|
||||
static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs";
|
||||
static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/";
|
||||
static String portalMatomoID = "109";
|
||||
static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
||||
|
||||
static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports";
|
||||
static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array";
|
||||
static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray";
|
||||
|
||||
static String lareferenciaLogPath = "/user/spyros/logs/usage_stats_logs6/lareferencia";
|
||||
static String lareferenciaBaseURL = "http://matomo.lareferencia.info";
|
||||
static String lareferenciaAuthToken = "484874b3655d5a831eb8db33695790c4";
|
||||
// static String matomoAuthToken = "703bd17d845acdaf795e01bb1e0895b9";
|
||||
// static String matomoBaseURL = "analytics.openaire.eu";
|
||||
// static String repoLogPath = "/user/spyros/logs/usage_stats_logs6/Repologs";
|
||||
// static String portalLogPath = "/user/spyros/logs/usage_stats_logs6/Portallogs/";
|
||||
// static String portalMatomoID = "109";
|
||||
// static String irusUKBaseURL = "https://irus.jisc.ac.uk/api/sushilite/v1_7/";
|
||||
//
|
||||
// static String irusUKReportPath = "/user/spyros/logs/usage_stats_logs6/irusUKReports";
|
||||
// static String sarcsReportPathArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/Array";
|
||||
// static String sarcsReportPathNonArray = "/user/spyros/logs/usage_stats_logs6/sarcReports/NonArray";
|
||||
//
|
||||
// static String lareferenciaLogPath = "/user/spyros/logs/usage_stats_logs6/lareferencia";
|
||||
// static String lareferenciaBaseURL = "http://matomo.lareferencia.info";
|
||||
// static String lareferenciaAuthToken = "484874b3655d5a831eb8db33695790c4";
|
||||
|
||||
public UsageStatsExporter(Properties properties) {
|
||||
this.properties = properties;
|
||||
|
@ -57,9 +59,51 @@ public class UsageStatsExporter {
|
|||
stmt.close();
|
||||
}
|
||||
|
||||
// public static void main(String[] args) throws Exception {
|
||||
// final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
// IOUtils
|
||||
// .toString(
|
||||
// UsageStatsExporter.class
|
||||
// .getResourceAsStream(
|
||||
// "/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json")));
|
||||
// parser.parseArgument(args);
|
||||
//
|
||||
// new UsageStatsExporter(null).run(parser);
|
||||
// }
|
||||
|
||||
// private void run(ArgumentApplicationParser parser) throws Exception {
|
||||
//
|
||||
// final String isLookupUrl = parser.get("isLookupUrl");
|
||||
// final String sourceNN = parser.get("sourceNameNode");
|
||||
// final String targetNN = parser.get("targetNameNode");
|
||||
// final String workDir = parser.get("workingDirectory");
|
||||
// final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
|
||||
//
|
||||
// final String distcp_memory_mb = parser.get("distcp_memory_mb");
|
||||
// final String distcp_task_timeout = parser.get("distcp_task_timeout");
|
||||
//
|
||||
// final String transform_only_s = parser.get("transform_only");
|
||||
// }
|
||||
|
||||
// public void export() throws Exception {
|
||||
public void export() throws Exception {
|
||||
|
||||
System.out.println(ExecuteWorkflow.matomoAuthToken);
|
||||
System.out.println(ExecuteWorkflow.matomoBaseURL);
|
||||
System.out.println(ExecuteWorkflow.repoLogPath);
|
||||
System.out.println(ExecuteWorkflow.portalLogPath);
|
||||
System.out.println(ExecuteWorkflow.irusUKBaseURL);
|
||||
System.out.println(ExecuteWorkflow.irusUKReportPath);
|
||||
System.out.println(ExecuteWorkflow.sarcsReportPathArray);
|
||||
System.out.println(ExecuteWorkflow.sarcsReportPathNonArray);
|
||||
System.out.println(ExecuteWorkflow.lareferenciaLogPath);
|
||||
System.out.println(ExecuteWorkflow.lareferenciaBaseURL);
|
||||
System.out.println(ExecuteWorkflow.lareferenciaAuthToken);
|
||||
|
||||
logger.info("=====> Test of the logger (info)");
|
||||
logger.debug("=====> Test of the logger (debug)");
|
||||
logger.error("=====> Test of the logger (error)");
|
||||
|
||||
// connect to DB
|
||||
System.out.println("====> Initialising DB properties");
|
||||
ConnectDB.init(properties);
|
||||
|
@ -69,12 +113,12 @@ public class UsageStatsExporter {
|
|||
|
||||
// Create DB tables - they are also needed to download the statistics too
|
||||
System.out.println("====> Creating database and tables");
|
||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
|
||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
||||
//
|
||||
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||
// // the moment
|
||||
System.out.println("====> Initializing the download logs module");
|
||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken);
|
||||
System.out.println("====> Downloading piwik logs");
|
||||
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||
System.out.println("====> Downloaded piwik logs");
|
||||
|
@ -85,14 +129,15 @@ public class UsageStatsExporter {
|
|||
piwikstatsdb.setCounterRobotsURL(cRobotsUrl);
|
||||
System.out.println("====> Processing logs");
|
||||
piwikstatsdb.processLogs();
|
||||
log.info("process logs done");
|
||||
// log.info("process logs done");
|
||||
|
||||
System.out.println("====> Creating LaReferencia tables");
|
||||
LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(lareferenciaBaseURL, lareferenciaAuthToken);
|
||||
LaReferenciaDownloadLogs lrf = new LaReferenciaDownloadLogs(ExecuteWorkflow.lareferenciaBaseURL,
|
||||
ExecuteWorkflow.lareferenciaAuthToken);
|
||||
System.out.println("====> Downloading LaReferencia logs");
|
||||
// lrf.GetLaReferenciaRepos(lareferenciaLogPath);
|
||||
System.out.println("====> Downloaded LaReferencia logs");
|
||||
LaReferenciaStats lastats = new LaReferenciaStats(lareferenciaLogPath);
|
||||
LaReferenciaStats lastats = new LaReferenciaStats(ExecuteWorkflow.lareferenciaLogPath);
|
||||
System.out.println("====> Processing LaReferencia logs");
|
||||
// lastats.processLogs();
|
||||
// log.info("LaReferencia logs done");
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
[
|
||||
{
|
||||
"paramName": "mat",
|
||||
"paramLongName": "matomoAuthToken",
|
||||
"paramDescription": "when true will stop SparkSession after job execution",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "mbu",
|
||||
"paramLongName": "matomoBaseURL",
|
||||
"paramDescription": "URL of the isLookUp Service",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "rlp",
|
||||
"paramLongName": "repoLogPath",
|
||||
"paramDescription": "nameNode of the source cluster",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "plp",
|
||||
"paramLongName": "portalLogPath",
|
||||
"paramDescription": "namoNode of the target cluster",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "pmi",
|
||||
"paramLongName": "portalMatomoID",
|
||||
"paramDescription": "namoNode of the target cluster",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "iukbuw",
|
||||
"paramLongName": "irusUKBaseURL",
|
||||
"paramDescription": "working directory",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "iukrp",
|
||||
"paramLongName": "irusUKReportPath",
|
||||
"paramDescription": "maximum number of map tasks used in the distcp process",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "srpa",
|
||||
"paramLongName": "sarcsReportPathArray",
|
||||
"paramDescription": "memory for distcp action copying actionsets from remote cluster",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "srpna",
|
||||
"paramLongName": "sarcsReportPathNonArray",
|
||||
"paramDescription": "timeout for distcp copying actions from remote cluster",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "llp",
|
||||
"paramLongName": "lareferenciaLogPath",
|
||||
"paramDescription": "activate tranform-only mode. Only apply transformation step",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "lbu",
|
||||
"paramLongName": "lareferenciaBaseURL",
|
||||
"paramDescription": "activate tranform-only mode. Only apply transformation step",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "lat",
|
||||
"paramLongName": "lareferenciaAuthToken",
|
||||
"paramDescription": "activate tranform-only mode. Only apply transformation step",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
|
@ -38,6 +38,19 @@
|
|||
<action name='Step1'>
|
||||
<java>
|
||||
<main-class>eu.dnetlib.oa.graph.usagestats.export.ExecuteWorkflow</main-class>
|
||||
<arg>--matomoAuthToken</arg><arg>${matomoAuthToken}</arg>
|
||||
<arg>--matomoBaseURL</arg><arg>${matomoBaseURL}</arg>
|
||||
<arg>--repoLogPath</arg><arg>${repoLogPath}</arg>
|
||||
<arg>--portalLogPath</arg><arg>${portalLogPath}</arg>
|
||||
<arg>--portalMatomoID</arg><arg>${portalMatomoID}</arg>
|
||||
<arg>--irusUKBaseURL</arg><arg>${irusUKBaseURL}</arg>
|
||||
<arg>--irusUKReportPath</arg><arg>${irusUKReportPath}</arg>
|
||||
<arg>--sarcsReportPathArray</arg><arg>${sarcsReportPathArray}</arg>
|
||||
<arg>--sarcsReportPathNonArray</arg><arg>${sarcsReportPathNonArray}</arg>
|
||||
<arg>--lareferenciaLogPath</arg><arg>${lareferenciaLogPath}</arg>
|
||||
<arg>--lareferenciaBaseURL</arg><arg>${lareferenciaBaseURL}</arg>
|
||||
<arg>--lareferenciaAuthToken</arg><arg>${lareferenciaAuthToken}</arg>
|
||||
<capture-output/>
|
||||
</java>
|
||||
<ok to="End" />
|
||||
<error to="Kill" />
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
#logging.config=log4j.properties
|
||||
db.driverClassName=org.postgresql.Driver
|
||||
#spring.datasource.url=jdbc:postgresql://88.197.53.70:5432/stats
|
||||
#spring.datasource.url=jdbc:postgresql://statsdb-beta.openaire.eu:5432/stats
|
||||
db.Url=jdbc:postgresql://88.197.53.70:5432/stats
|
||||
#db.Url=jdbc:postgresql://statsdb-prod.openaire.eu/stats
|
||||
db.username=sqoop
|
||||
db.password=sqoop
|
||||
db.defaultschema=usagestats
|
||||
matomo.AuthToken=703bd17d845acdaf795e01bb1e0895b9
|
||||
matomo.BaseUrl=analytics.openaire.eu
|
||||
#repo.LogPath=/Users/dpie/Desktop/Repologs/
|
||||
repo.LogPath=/user/spyros/logs/usage_stats_logs/Repologs
|
||||
portal.LogPath=/user/spyros/logs/usage_stats_logs/Portallogs/
|
||||
portal.MatomoID=109
|
||||
COUNTER.robots.Url=https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json
|
||||
IRUS_UK.BaseUrl=https://irus.jisc.ac.uk/api/sushilite/v1_7/
|
||||
#server.compression.enabled=true
|
||||
#compression.max_number_of_records=1000
|
||||
#usagestats.redis.hostname=localhost
|
||||
#usagestats.redis.port=6379
|
||||
#spring.jackson.serialization.INDENT_OUTPUT=true
|
||||
#download.folder=/Users/dpie/DownloadSushiLite
|
||||
#sushi-lite.server=http://localhost:8080
|
Loading…
Reference in New Issue