From df0ea62a5a35c68e4376319dba77fd0b52ffb583 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Thu, 19 Oct 2023 11:59:37 +0300 Subject: [PATCH] - Handle the case when the "webHDFSBaseUrl" does not use HTTPS. - Improve error-reporting when uploading a file to HDFS. --- .../openaire/urls_controller/util/ParquetFileUtils.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java index 1061752..0a69164 100644 --- a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java @@ -448,7 +448,9 @@ public class ParquetFileUtils { } //logger.trace("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG! - location = StringUtils.replace(location, "http:", "https:", 1); + // In case the WebHDFS uses https, then perform the offline redirect to https here (to avoid the live redirection.) + if ( webHDFSBaseUrl.startsWith("https:") ) + location = StringUtils.replace(location, "http:", "https:", 1); // Unless we handle this here, we have to either complicate the process by handling the https-redirect or in any-way getting a hit in performance by having one more step each time ww want to upload a file. conn = (HttpURLConnection) (new URL(location)).openConnection(); // This already contains the "user.name" parameter. @@ -487,8 +489,8 @@ public class ParquetFileUtils { // Using the "load data inpath" command, he files are MOVED, not copied! So we don't have to delete them afterwards. // See: https://docs.cloudera.com/documentation/enterprise/latest/topics/impala_load_data.html } catch (Throwable e) { - String errorMsg = "Error while uploading parquet file \"" + parquetFileFullLocalPath + "\" to HDFS!\n" + e; - logger.error(errorMsg); + String errorMsg = "Error while uploading parquet file \"" + parquetFileFullLocalPath + "\" to HDFS!\n" + e.getMessage(); + logger.error(errorMsg, e); return errorMsg; }