From 992d4ffd5e91861ac1ce0e910213bf857ea0c823 Mon Sep 17 00:00:00 2001
From: LSmyrnaios <lsmyrnaios@gmail.com>
Date: Thu, 11 May 2023 03:10:53 +0300
Subject: [PATCH] - Add the time-zone in the logs. - Change some log-levels to
 "trace", although most of them are still disabled.

---
 .../services/UrlsServiceImpl.java             |  3 +-
 .../urls_controller/util/FileUtils.java       |  7 ++--
 .../util/ParquetFileUtils.java                | 34 +++++++++----------
 src/main/resources/logback-spring.xml         |  4 +--
 4 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java
index 26887e6..d1a5027 100644
--- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java
+++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java
@@ -55,6 +55,7 @@ public class UrlsServiceImpl implements UrlsService {
     private static String excludedDatasourceIDsStringList = null;
 
     public static final ExecutorService insertsExecutor = Executors.newFixedThreadPool(6);
+    // TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?
 
 
     public UrlsServiceImpl(@Value("${services.pdfaggregation.controller.maxAttemptsPerRecord}") int maxAttemptsPerRecord, BulkImport bulkImport)
@@ -127,7 +128,7 @@ public class UrlsServiceImpl implements UrlsService {
 
 
         // The "order by" in the end makes sure the older attempted records will be re-attempted after a long time.
-        //logger.debug("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG!
+        logger.trace("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG!
 
         final String getAssignmentsQuery = "select * from " + ImpalaConnector.databaseName + ".current_assignment";
 
diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
index 60eec41..f197f23 100644
--- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
+++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
@@ -117,6 +117,7 @@ public class FileUtils {
     private static final int numOfFullTextsPerBatch = 70;   // The HTTP-headers cannot be too large (It failed with 100 fileNames).
 
     public static final ExecutorService hashMatchingExecutor = Executors.newFixedThreadPool(6);
+    // TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?
 
 
     public UploadFullTextsResponse getAndUploadFullTexts(List<UrlReport> urlReports, HttpServletRequest request, long assignmentsBatchCounter, String workerId) {
@@ -173,7 +174,7 @@ public class FileUtils {
 
                     if ( alreadyFoundFileLocation != null ) {   // If the full-text of this record is already-found and uploaded.
                         payload.setLocation(alreadyFoundFileLocation);  // Set the location to the older identical file, which was uploaded to S3. The other file-data is identical.
-                        //logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG!
+                        //logger.trace("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG!
                         numFilesFoundFromPreviousAssignmentsBatches.incrementAndGet();
                         numFullTextsFound.incrementAndGet();
                         return null;   // Do not request the file from the worker, it's already uploaded. Move on. The "location" will be filled my the "setFullTextForMultiplePayloads()" method, later.
@@ -332,9 +333,9 @@ public class FileUtils {
             conn.setRequestProperty("User-Agent", "UrlsController");
             conn.connect();
             int statusCode = conn.getResponseCode();
-            if ( statusCode == -1 ) {
+            if ( statusCode == -1 ) {   //  Invalid HTTP-Response.
                 logger.warn("Problem when getting the \"status-code\" for url: " + requestUrl);
-                throw new RuntimeException();
+                throw new RuntimeException();   // Avoid any other batches.
             } else if ( statusCode != 200 ) {
                 logger.warn("HTTP-" + statusCode + ": " + getMessageFromResponseBody(conn, true) + "\n\nProblem when requesting the ZstdFile of batch_" + batchNum + " from the Worker with ID \"" + workerId + "\" and requestUrl: " + requestUrl);
                 if ( (statusCode >= 500) && (statusCode <= 599) )
diff --git a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java
index 65ba5ca..4733fd5 100644
--- a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java
+++ b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java
@@ -110,7 +110,7 @@ public class ParquetFileUtils {
         else    // At this point, all credential-checks have been made and there is no way the Controller can continue.
             throw new RuntimeException("No hdfs-credentials were given, in any form!");
 
-        //logger.debug("\"hdfsHttpAuthString\":   " + this.hdfsHttpAuthString);    // DEBUG!
+        //logger.trace("\"hdfsHttpAuthString\":   " + this.hdfsHttpAuthString);    // DEBUG!
 
         if ( ! parquetBaseDirectoryPath.endsWith(File.separator) )
             this.parquetBaseLocalDirectoryPath = parquetBaseDirectoryPath + File.separator;
@@ -241,11 +241,11 @@ public class ParquetFileUtils {
         }
 
         String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_attempts_" + attemptsIncNum + ".parquet";
-        //logger.debug("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName);    // DEBUG!
+        //logger.trace("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName);    // DEBUG!
 
         String fullFilePath = currentParquetPath + fileName;
         if ( writeToParquet(recordList, attemptsSchema, fullFilePath) ) {
-            //logger.debug("Parquet file \"" + fileName + "\" was created and filled.");    // DEBUG!
+            //logger.trace("Parquet file \"" + fileName + "\" was created and filled.");    // DEBUG!
 
             // Upload and insert the data to the "attempt" Impala table.
             String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathAttempts);
@@ -301,11 +301,11 @@ public class ParquetFileUtils {
         }
 
         String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_payloads.parquet";
-        //logger.debug("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName);    // DEBUG!
+        //logger.trace("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName);    // DEBUG!
 
         String fullFilePath = currentParquetPath + fileName;
         if ( writeToParquet(recordList, payloadsSchema, fullFilePath) ) {
-            //logger.debug("Parquet file \"" + fileName + "\" was created and filled.");    // DEBUG!
+            //logger.trace("Parquet file \"" + fileName + "\" was created and filled.");    // DEBUG!
 
             // Upload and insert the data to the "payload" Impala table.
             String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathPayloads);
@@ -321,7 +321,7 @@ public class ParquetFileUtils {
     public boolean writeToParquet(List<GenericData.Record> recordList, Schema schema, String fullFilePath)
     {
         OutputFile outputFile;
-        try {
+        try {   // TODO - Verify that this will create any directories which do not exist in the provided path. Currently we create the directories beforehand.
             outputFile = HadoopOutputFile.fromPath(new Path(fullFilePath), new Configuration());
             //logger.trace("Created the parquet " + outputFile);  // DEBUG!
         } catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
@@ -335,9 +335,9 @@ public class ParquetFileUtils {
             // When the app runs inside a Docker Container, it is NOT guaranteed that all compression-types will work. For example, the "SNAPPY"-compression does NOT work, while the "GZIP" works.
             // Also, we would prefer ZSTD over GZIP, but the old version of the Impala-Database does not support it..
 
-            //logger.debug("Going to write to \"" + fullFilePath + "\" the record list: " + recordList);  // DEBUG!
+            //logger.trace("Going to write to \"" + fullFilePath + "\" the record list: " + recordList);  // DEBUG!
             for ( GenericRecord record : recordList ) {
-                //logger.debug("Writing to \"" + fullFilePath + "\" the record: " + record);  // DEBUG!
+                //logger.trace("Writing to \"" + fullFilePath + "\" the record: " + record);  // DEBUG!
                 writer.write(record);
             }
         } catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
@@ -352,7 +352,7 @@ public class ParquetFileUtils {
             return false;
         }
 
-        //logger.debug("Done writing to \"" + fullFilePath + "\"");  // DEBUG!
+        //logger.trace("Done writing to \"" + fullFilePath + "\"");  // DEBUG!
         return true;
     }
 
@@ -401,7 +401,7 @@ public class ParquetFileUtils {
                 logger.error(errorMsg + "\n\n" + conn.getHeaderFields());
                 return errorMsg;
             }
-            //logger.debug("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG!
+            //logger.trace("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG!
 
             location = StringUtils.replace(location, "http:", "https:", 1);
             // Unless we handle this here, we have to either complicate the process by handling the https-redirect or in any-way getting a hit in performance by having one more step each time ww want to upload a file.
@@ -432,7 +432,7 @@ public class ParquetFileUtils {
                 // Do not return here. The absence of the location is not critical. We can still create it on our own.
                 location = parquetFileURI;  // This location does not include the "user.name" parameter.
             }
-            //logger.debug("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location);    // DEBUG!
+            //logger.trace("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location);    // DEBUG!
 
             // Important note!
             // Using the "load data inpath" command, he files are MOVED, not copied! So we don't have to delete them afterwards.
@@ -459,7 +459,7 @@ public class ParquetFileUtils {
             ImpalaConnector.handleQueryException("loadParquetInTableQuery", loadParquetInTableQuery, e);  // It's already logged.
             return false;
         }
-        //logger.debug("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table.");    // DEBUG!
+        //logger.trace("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table.");    // DEBUG!
         return true;
     }
 
@@ -512,7 +512,7 @@ public class ParquetFileUtils {
                 }
                 // Else, if an error message exists inside the response, then we will be alerted when parsing the Json bellow.
 
-                //logger.debug("\"jsonResponse\":\n" + jsonResponse); // DEBUG!
+                //logger.trace("\"jsonResponse\":\n" + jsonResponse); // DEBUG!
 
                 boolean foundAttemptsDir = false;
                 boolean foundPayloadsAggregatedDir = false;
@@ -521,18 +521,18 @@ public class ParquetFileUtils {
                 try {   // Parse the jsonData
                     JSONObject jObj = new JSONObject(jsonResponse); // Construct a JSONObject from the retrieved jsonData.
                     JSONObject entityObject = jObj.getJSONObject("FileStatuses");
-                    //logger.debug("EntityObject: " + entityObject.toString());	// DEBUG!
+                    //logger.trace("EntityObject: " + entityObject.toString());	// DEBUG!
 
                     JSONArray directoryStatuses = entityObject.getJSONArray("FileStatus");
-                    //logger.debug("directoryStatuses: " + directoryStatuses.toString());	// DEBUG!
+                    //logger.trace("directoryStatuses: " + directoryStatuses.toString());	// DEBUG!
 
                     // In case no fileStatuses are found, the follow for-loop will not run.
                     for ( Object fileStatusObject : directoryStatuses ) {
                         JSONObject fileStatusJsonObject = (JSONObject) fileStatusObject;
-                        //logger.debug("FileStatusJsonObject: " + fileStatusJsonObject.toString());	// DEBUG!
+                        //logger.trace("FileStatusJsonObject: " + fileStatusJsonObject.toString());	// DEBUG!
 
                         String dirPath = fileStatusJsonObject.getString("pathSuffix");
-                        //logger.debug("DirPath: " + dirPath);	// DEBUG!
+                        //logger.trace("DirPath: " + dirPath);	// DEBUG!
 
                         if ( dirPath.equals("attempts") )
                             foundAttemptsDir = true;
diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml
index 86ba426..0769f58 100644
--- a/src/main/resources/logback-spring.xml
+++ b/src/main/resources/logback-spring.xml
@@ -15,14 +15,14 @@
 
         <encoder>
             <charset>UTF-8</charset>
-            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
+            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
         </encoder>
     </appender>
 
     <appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
         <encoder>
             <charset>UTF-8</charset>
-            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
+            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
         </encoder>
     </appender>