From 992d4ffd5e91861ac1ce0e910213bf857ea0c823 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Thu, 11 May 2023 03:10:53 +0300 Subject: [PATCH] - Add the time-zone in the logs. - Change some log-levels to "trace", although most of them are still disabled. --- .../services/UrlsServiceImpl.java | 3 +- .../urls_controller/util/FileUtils.java | 7 ++-- .../util/ParquetFileUtils.java | 34 +++++++++---------- src/main/resources/logback-spring.xml | 4 +-- 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java index 26887e6..d1a5027 100644 --- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java @@ -55,6 +55,7 @@ public class UrlsServiceImpl implements UrlsService { private static String excludedDatasourceIDsStringList = null; public static final ExecutorService insertsExecutor = Executors.newFixedThreadPool(6); + // TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle? public UrlsServiceImpl(@Value("${services.pdfaggregation.controller.maxAttemptsPerRecord}") int maxAttemptsPerRecord, BulkImport bulkImport) @@ -127,7 +128,7 @@ public class UrlsServiceImpl implements UrlsService { // The "order by" in the end makes sure the older attempted records will be re-attempted after a long time. - //logger.debug("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG! + logger.trace("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG! final String getAssignmentsQuery = "select * from " + ImpalaConnector.databaseName + ".current_assignment"; diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index 60eec41..f197f23 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -117,6 +117,7 @@ public class FileUtils { private static final int numOfFullTextsPerBatch = 70; // The HTTP-headers cannot be too large (It failed with 100 fileNames). public static final ExecutorService hashMatchingExecutor = Executors.newFixedThreadPool(6); + // TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle? public UploadFullTextsResponse getAndUploadFullTexts(List urlReports, HttpServletRequest request, long assignmentsBatchCounter, String workerId) { @@ -173,7 +174,7 @@ public class FileUtils { if ( alreadyFoundFileLocation != null ) { // If the full-text of this record is already-found and uploaded. payload.setLocation(alreadyFoundFileLocation); // Set the location to the older identical file, which was uploaded to S3. The other file-data is identical. - //logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG! + //logger.trace("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG! numFilesFoundFromPreviousAssignmentsBatches.incrementAndGet(); numFullTextsFound.incrementAndGet(); return null; // Do not request the file from the worker, it's already uploaded. Move on. The "location" will be filled my the "setFullTextForMultiplePayloads()" method, later. @@ -332,9 +333,9 @@ public class FileUtils { conn.setRequestProperty("User-Agent", "UrlsController"); conn.connect(); int statusCode = conn.getResponseCode(); - if ( statusCode == -1 ) { + if ( statusCode == -1 ) { // Invalid HTTP-Response. logger.warn("Problem when getting the \"status-code\" for url: " + requestUrl); - throw new RuntimeException(); + throw new RuntimeException(); // Avoid any other batches. } else if ( statusCode != 200 ) { logger.warn("HTTP-" + statusCode + ": " + getMessageFromResponseBody(conn, true) + "\n\nProblem when requesting the ZstdFile of batch_" + batchNum + " from the Worker with ID \"" + workerId + "\" and requestUrl: " + requestUrl); if ( (statusCode >= 500) && (statusCode <= 599) ) diff --git a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java index 65ba5ca..4733fd5 100644 --- a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java @@ -110,7 +110,7 @@ public class ParquetFileUtils { else // At this point, all credential-checks have been made and there is no way the Controller can continue. throw new RuntimeException("No hdfs-credentials were given, in any form!"); - //logger.debug("\"hdfsHttpAuthString\": " + this.hdfsHttpAuthString); // DEBUG! + //logger.trace("\"hdfsHttpAuthString\": " + this.hdfsHttpAuthString); // DEBUG! if ( ! parquetBaseDirectoryPath.endsWith(File.separator) ) this.parquetBaseLocalDirectoryPath = parquetBaseDirectoryPath + File.separator; @@ -241,11 +241,11 @@ public class ParquetFileUtils { } String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_attempts_" + attemptsIncNum + ".parquet"; - //logger.debug("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName); // DEBUG! + //logger.trace("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName); // DEBUG! String fullFilePath = currentParquetPath + fileName; if ( writeToParquet(recordList, attemptsSchema, fullFilePath) ) { - //logger.debug("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG! + //logger.trace("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG! // Upload and insert the data to the "attempt" Impala table. String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathAttempts); @@ -301,11 +301,11 @@ public class ParquetFileUtils { } String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_payloads.parquet"; - //logger.debug("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName); // DEBUG! + //logger.trace("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName); // DEBUG! String fullFilePath = currentParquetPath + fileName; if ( writeToParquet(recordList, payloadsSchema, fullFilePath) ) { - //logger.debug("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG! + //logger.trace("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG! // Upload and insert the data to the "payload" Impala table. String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathPayloads); @@ -321,7 +321,7 @@ public class ParquetFileUtils { public boolean writeToParquet(List recordList, Schema schema, String fullFilePath) { OutputFile outputFile; - try { + try { // TODO - Verify that this will create any directories which do not exist in the provided path. Currently we create the directories beforehand. outputFile = HadoopOutputFile.fromPath(new Path(fullFilePath), new Configuration()); //logger.trace("Created the parquet " + outputFile); // DEBUG! } catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING! @@ -335,9 +335,9 @@ public class ParquetFileUtils { // When the app runs inside a Docker Container, it is NOT guaranteed that all compression-types will work. For example, the "SNAPPY"-compression does NOT work, while the "GZIP" works. // Also, we would prefer ZSTD over GZIP, but the old version of the Impala-Database does not support it.. - //logger.debug("Going to write to \"" + fullFilePath + "\" the record list: " + recordList); // DEBUG! + //logger.trace("Going to write to \"" + fullFilePath + "\" the record list: " + recordList); // DEBUG! for ( GenericRecord record : recordList ) { - //logger.debug("Writing to \"" + fullFilePath + "\" the record: " + record); // DEBUG! + //logger.trace("Writing to \"" + fullFilePath + "\" the record: " + record); // DEBUG! writer.write(record); } } catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING! @@ -352,7 +352,7 @@ public class ParquetFileUtils { return false; } - //logger.debug("Done writing to \"" + fullFilePath + "\""); // DEBUG! + //logger.trace("Done writing to \"" + fullFilePath + "\""); // DEBUG! return true; } @@ -401,7 +401,7 @@ public class ParquetFileUtils { logger.error(errorMsg + "\n\n" + conn.getHeaderFields()); return errorMsg; } - //logger.debug("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG! + //logger.trace("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG! location = StringUtils.replace(location, "http:", "https:", 1); // Unless we handle this here, we have to either complicate the process by handling the https-redirect or in any-way getting a hit in performance by having one more step each time ww want to upload a file. @@ -432,7 +432,7 @@ public class ParquetFileUtils { // Do not return here. The absence of the location is not critical. We can still create it on our own. location = parquetFileURI; // This location does not include the "user.name" parameter. } - //logger.debug("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location); // DEBUG! + //logger.trace("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location); // DEBUG! // Important note! // Using the "load data inpath" command, he files are MOVED, not copied! So we don't have to delete them afterwards. @@ -459,7 +459,7 @@ public class ParquetFileUtils { ImpalaConnector.handleQueryException("loadParquetInTableQuery", loadParquetInTableQuery, e); // It's already logged. return false; } - //logger.debug("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table."); // DEBUG! + //logger.trace("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table."); // DEBUG! return true; } @@ -512,7 +512,7 @@ public class ParquetFileUtils { } // Else, if an error message exists inside the response, then we will be alerted when parsing the Json bellow. - //logger.debug("\"jsonResponse\":\n" + jsonResponse); // DEBUG! + //logger.trace("\"jsonResponse\":\n" + jsonResponse); // DEBUG! boolean foundAttemptsDir = false; boolean foundPayloadsAggregatedDir = false; @@ -521,18 +521,18 @@ public class ParquetFileUtils { try { // Parse the jsonData JSONObject jObj = new JSONObject(jsonResponse); // Construct a JSONObject from the retrieved jsonData. JSONObject entityObject = jObj.getJSONObject("FileStatuses"); - //logger.debug("EntityObject: " + entityObject.toString()); // DEBUG! + //logger.trace("EntityObject: " + entityObject.toString()); // DEBUG! JSONArray directoryStatuses = entityObject.getJSONArray("FileStatus"); - //logger.debug("directoryStatuses: " + directoryStatuses.toString()); // DEBUG! + //logger.trace("directoryStatuses: " + directoryStatuses.toString()); // DEBUG! // In case no fileStatuses are found, the follow for-loop will not run. for ( Object fileStatusObject : directoryStatuses ) { JSONObject fileStatusJsonObject = (JSONObject) fileStatusObject; - //logger.debug("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG! + //logger.trace("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG! String dirPath = fileStatusJsonObject.getString("pathSuffix"); - //logger.debug("DirPath: " + dirPath); // DEBUG! + //logger.trace("DirPath: " + dirPath); // DEBUG! if ( dirPath.equals("attempts") ) foundAttemptsDir = true; diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml index 86ba426..0769f58 100644 --- a/src/main/resources/logback-spring.xml +++ b/src/main/resources/logback-spring.xml @@ -15,14 +15,14 @@ UTF-8 - %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n + %d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n UTF-8 - %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n + %d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n