forked from D-Net/dnet-hadoop
Download directory automatic deletion & creation
This commit is contained in:
parent
2e2e2b8b29
commit
e2748fea95
|
@ -31,6 +31,7 @@ public class ExecuteWorkflow {
|
||||||
static String dbImpalaUrl;
|
static String dbImpalaUrl;
|
||||||
static String usageStatsDBSchema;
|
static String usageStatsDBSchema;
|
||||||
static String statsDBSchema;
|
static String statsDBSchema;
|
||||||
|
static boolean downloadLogs;
|
||||||
|
|
||||||
public static void main(String args[]) throws Exception {
|
public static void main(String args[]) throws Exception {
|
||||||
|
|
||||||
|
@ -56,28 +57,16 @@ public class ExecuteWorkflow {
|
||||||
lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
|
lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
|
||||||
lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
|
lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
|
||||||
|
|
||||||
|
if (parser.get("downloadLogs").toLowerCase().equals("true"))
|
||||||
|
downloadLogs = true;
|
||||||
|
else
|
||||||
|
downloadLogs = false;
|
||||||
|
|
||||||
dbHiveUrl = parser.get("dbHiveUrl");
|
dbHiveUrl = parser.get("dbHiveUrl");
|
||||||
dbImpalaUrl = parser.get("dbImpalaUrl");
|
dbImpalaUrl = parser.get("dbImpalaUrl");
|
||||||
usageStatsDBSchema = parser.get("usageStatsDBSchema");
|
usageStatsDBSchema = parser.get("usageStatsDBSchema");
|
||||||
statsDBSchema = parser.get("statsDBSchema");
|
statsDBSchema = parser.get("statsDBSchema");
|
||||||
|
|
||||||
System.out.println("====> Printing parsed variables");
|
|
||||||
System.out.println(ExecuteWorkflow.matomoAuthToken);
|
|
||||||
System.out.println(ExecuteWorkflow.matomoBaseURL);
|
|
||||||
System.out.println(ExecuteWorkflow.repoLogPath);
|
|
||||||
System.out.println(ExecuteWorkflow.portalLogPath);
|
|
||||||
System.out.println(ExecuteWorkflow.irusUKBaseURL);
|
|
||||||
System.out.println(ExecuteWorkflow.irusUKReportPath);
|
|
||||||
System.out.println(ExecuteWorkflow.sarcsReportPathArray);
|
|
||||||
System.out.println(ExecuteWorkflow.sarcsReportPathNonArray);
|
|
||||||
System.out.println(ExecuteWorkflow.lareferenciaLogPath);
|
|
||||||
System.out.println(ExecuteWorkflow.lareferenciaBaseURL);
|
|
||||||
System.out.println(ExecuteWorkflow.lareferenciaAuthToken);
|
|
||||||
System.out.println(ExecuteWorkflow.dbHiveUrl);
|
|
||||||
System.out.println(ExecuteWorkflow.dbImpalaUrl);
|
|
||||||
System.out.println(ExecuteWorkflow.usageStatsDBSchema);
|
|
||||||
System.out.println(ExecuteWorkflow.statsDBSchema);
|
|
||||||
|
|
||||||
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
UsageStatsExporter usagestatsExport = new UsageStatsExporter();
|
||||||
usagestatsExport.export();
|
usagestatsExport.export();
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,11 +79,18 @@ public class PiwikStatsDB {
|
||||||
try {
|
try {
|
||||||
stmt = ConnectDB.getHiveConnection().createStatement();
|
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||||
|
|
||||||
logger.info("Dropping usagestats DB");
|
logger.info("Dropping usagestats DB: " + ConnectDB.getUsageStatsDBSchema());
|
||||||
String dropDatabase = "DROP DATABASE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + "CASCADE;";
|
String dropDatabase = "DROP DATABASE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + " CASCADE";
|
||||||
stmt.executeUpdate(dropDatabase);
|
stmt.executeUpdate(dropDatabase);
|
||||||
|
} catch (Exception e) {
|
||||||
logger.info("Creating usagestats DB");
|
logger.error("Failed to drop database: " + e);
|
||||||
|
throw new Exception("Failed to drop database: " + e.toString(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
stmt = ConnectDB.getHiveConnection().createStatement();
|
||||||
|
|
||||||
|
logger.info("Creating usagestats DB: " + ConnectDB.getUsageStatsDBSchema());
|
||||||
String createDatabase = "CREATE DATABASE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema();
|
String createDatabase = "CREATE DATABASE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema();
|
||||||
stmt.executeUpdate(createDatabase);
|
stmt.executeUpdate(createDatabase);
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,15 @@
|
||||||
|
|
||||||
package eu.dnetlib.oa.graph.usagestats.export;
|
package eu.dnetlib.oa.graph.usagestats.export;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
|
|
||||||
|
import javax.sound.midi.SysexMessage;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -41,26 +47,75 @@ public class UsageStatsExporter {
|
||||||
stmt.close();
|
stmt.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void reCreateLogDirs() throws IllegalArgumentException, IOException {
|
||||||
|
FileSystem dfs = FileSystem.get(new Configuration());
|
||||||
|
|
||||||
|
logger.info("Deleting log directories");
|
||||||
|
|
||||||
|
logger.info("Deleting repoLog directory: " + ExecuteWorkflow.repoLogPath);
|
||||||
|
dfs.delete(new Path(ExecuteWorkflow.repoLogPath), true);
|
||||||
|
|
||||||
|
logger.info("Deleting portalLog directory: " + ExecuteWorkflow.portalLogPath);
|
||||||
|
dfs.delete(new Path(ExecuteWorkflow.portalLogPath), true);
|
||||||
|
|
||||||
|
logger.info("Deleting irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
||||||
|
dfs.delete(new Path(ExecuteWorkflow.irusUKReportPath), true);
|
||||||
|
|
||||||
|
logger.info("Deleting sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
||||||
|
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathArray), true);
|
||||||
|
|
||||||
|
logger.info("Deleting sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray);
|
||||||
|
dfs.delete(new Path(ExecuteWorkflow.sarcsReportPathNonArray), true);
|
||||||
|
|
||||||
|
logger.info("Deleting lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath);
|
||||||
|
dfs.delete(new Path(ExecuteWorkflow.lareferenciaLogPath), true);
|
||||||
|
|
||||||
|
logger.info("Creating log directories");
|
||||||
|
|
||||||
|
logger.info("Creating repoLog directory: " + ExecuteWorkflow.repoLogPath);
|
||||||
|
dfs.mkdirs(new Path(ExecuteWorkflow.repoLogPath));
|
||||||
|
|
||||||
|
logger.info("Creating portalLog directory: " + ExecuteWorkflow.portalLogPath);
|
||||||
|
dfs.mkdirs(new Path(ExecuteWorkflow.portalLogPath));
|
||||||
|
|
||||||
|
logger.info("Creating irusUKReport directory: " + ExecuteWorkflow.irusUKReportPath);
|
||||||
|
dfs.mkdirs(new Path(ExecuteWorkflow.irusUKReportPath));
|
||||||
|
|
||||||
|
logger.info("Creating sarcsReport (Array) directory: " + ExecuteWorkflow.sarcsReportPathArray);
|
||||||
|
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathArray));
|
||||||
|
|
||||||
|
logger.info("Creating sarcsReport (NonArray) directory: " + ExecuteWorkflow.sarcsReportPathNonArray);
|
||||||
|
dfs.mkdirs(new Path(ExecuteWorkflow.sarcsReportPathNonArray));
|
||||||
|
|
||||||
|
logger.info("Creating lareferenciaLog directory: " + ExecuteWorkflow.lareferenciaLogPath);
|
||||||
|
dfs.mkdirs(new Path(ExecuteWorkflow.lareferenciaLogPath));
|
||||||
|
}
|
||||||
|
|
||||||
public void export() throws Exception {
|
public void export() throws Exception {
|
||||||
|
|
||||||
logger.info("Initialising DB properties");
|
logger.info("Initialising DB properties");
|
||||||
ConnectDB.init();
|
ConnectDB.init();
|
||||||
|
|
||||||
// System.exit(0);
|
|
||||||
|
|
||||||
// runImpalaQuery();
|
// runImpalaQuery();
|
||||||
|
|
||||||
// Create DB tables - they are also needed to download the statistics too
|
// Create DB tables - they are also needed to download the statistics too
|
||||||
logger.info("Creating database and tables");
|
logger.info("Creating database and tables");
|
||||||
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(ExecuteWorkflow.repoLogPath, ExecuteWorkflow.portalLogPath);
|
||||||
//
|
//
|
||||||
|
reCreateLogDirs();
|
||||||
|
|
||||||
|
System.exit(0);
|
||||||
|
|
||||||
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
// // Download the statistics - The following 2 lines are not needed after the download - Commenting them out for
|
||||||
// // the moment
|
// // the moment
|
||||||
logger.info("Initializing the download logs module");
|
logger.info("Initializing the download logs module");
|
||||||
PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken);
|
PiwikDownloadLogs piwd = new PiwikDownloadLogs(ExecuteWorkflow.matomoBaseURL, ExecuteWorkflow.matomoAuthToken);
|
||||||
logger.info("Downloading piwik logs");
|
logger.info("Downloading piwik logs");
|
||||||
// piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
if (ExecuteWorkflow.downloadLogs)
|
||||||
|
piwd
|
||||||
|
.GetOpenAIRELogs(
|
||||||
|
ExecuteWorkflow.repoLogPath,
|
||||||
|
ExecuteWorkflow.portalLogPath, ExecuteWorkflow.portalMatomoID);
|
||||||
logger.info("Downloaded piwik logs");
|
logger.info("Downloaded piwik logs");
|
||||||
|
|
||||||
// Create DB tables, insert/update statistics
|
// Create DB tables, insert/update statistics
|
||||||
|
|
|
@ -94,5 +94,11 @@
|
||||||
"paramLongName": "statsDBSchema",
|
"paramLongName": "statsDBSchema",
|
||||||
"paramDescription": "activate tranform-only mode. Only apply transformation step",
|
"paramDescription": "activate tranform-only mode. Only apply transformation step",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "dl",
|
||||||
|
"paramLongName": "downloadLogs",
|
||||||
|
"paramDescription": "download logs?",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -58,6 +58,7 @@
|
||||||
<arg>--dbImpalaUrl</arg><arg>${impalaJdbcUrl}</arg>
|
<arg>--dbImpalaUrl</arg><arg>${impalaJdbcUrl}</arg>
|
||||||
<arg>--usageStatsDBSchema</arg><arg>${usageStatsDBSchema}</arg>
|
<arg>--usageStatsDBSchema</arg><arg>${usageStatsDBSchema}</arg>
|
||||||
<arg>--statsDBSchema</arg><arg>${statsDBSchema}</arg>
|
<arg>--statsDBSchema</arg><arg>${statsDBSchema}</arg>
|
||||||
|
<arg>--downloadLogs</arg><arg>${downloadLogs}</arg>
|
||||||
<capture-output/>
|
<capture-output/>
|
||||||
</java>
|
</java>
|
||||||
<ok to="End" />
|
<ok to="End" />
|
||||||
|
|
Loading…
Reference in New Issue