- Add the time-zone in the logs.

- Change some log-levels to "trace", although most of them are still disabled.
2023-05-11 03:10:53 +03:00 · 2023-05-11 03:10:53 +03:00 · 992d4ffd5e
parent b6e8cd1889
commit 992d4ffd5e
4 changed files with 25 additions and 23 deletions
--- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java
+++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java
@ -55,6 +55,7 @@ public class UrlsServiceImpl implements UrlsService {
    private static String excludedDatasourceIDsStringList = null;

    public static final ExecutorService insertsExecutor = Executors.newFixedThreadPool(6);
+    // TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?


    public UrlsServiceImpl(@Value("${services.pdfaggregation.controller.maxAttemptsPerRecord}") int maxAttemptsPerRecord, BulkImport bulkImport)
@ -127,7 +128,7 @@ public class UrlsServiceImpl implements UrlsService {


        // The "order by" in the end makes sure the older attempted records will be re-attempted after a long time.
-        //logger.debug("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG!
+        logger.trace("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG!

        final String getAssignmentsQuery = "select * from " + ImpalaConnector.databaseName + ".current_assignment";

--- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
+++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
@ -117,6 +117,7 @@ public class FileUtils {
    private static final int numOfFullTextsPerBatch = 70;   // The HTTP-headers cannot be too large (It failed with 100 fileNames).

    public static final ExecutorService hashMatchingExecutor = Executors.newFixedThreadPool(6);
+    // TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?


    public UploadFullTextsResponse getAndUploadFullTexts(List<UrlReport> urlReports, HttpServletRequest request, long assignmentsBatchCounter, String workerId) {
@ -173,7 +174,7 @@ public class FileUtils {

                    if ( alreadyFoundFileLocation != null ) {   // If the full-text of this record is already-found and uploaded.
                        payload.setLocation(alreadyFoundFileLocation);  // Set the location to the older identical file, which was uploaded to S3. The other file-data is identical.
-                        //logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG!
+                        //logger.trace("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG!
                        numFilesFoundFromPreviousAssignmentsBatches.incrementAndGet();
                        numFullTextsFound.incrementAndGet();
                        return null;   // Do not request the file from the worker, it's already uploaded. Move on. The "location" will be filled my the "setFullTextForMultiplePayloads()" method, later.
@ -332,9 +333,9 @@ public class FileUtils {
            conn.setRequestProperty("User-Agent", "UrlsController");
            conn.connect();
            int statusCode = conn.getResponseCode();
-            if ( statusCode == -1 ) {
+            if ( statusCode == -1 ) {   //  Invalid HTTP-Response.
                logger.warn("Problem when getting the \"status-code\" for url: " + requestUrl);
-                throw new RuntimeException();
+                throw new RuntimeException();   // Avoid any other batches.
            } else if ( statusCode != 200 ) {
                logger.warn("HTTP-" + statusCode + ": " + getMessageFromResponseBody(conn, true) + "\n\nProblem when requesting the ZstdFile of batch_" + batchNum + " from the Worker with ID \"" + workerId + "\" and requestUrl: " + requestUrl);
                if ( (statusCode >= 500) && (statusCode <= 599) )
--- a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java
+++ b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java
@ -110,7 +110,7 @@ public class ParquetFileUtils {
        else    // At this point, all credential-checks have been made and there is no way the Controller can continue.
            throw new RuntimeException("No hdfs-credentials were given, in any form!");

-        //logger.debug("\"hdfsHttpAuthString\":   " + this.hdfsHttpAuthString);    // DEBUG!
+        //logger.trace("\"hdfsHttpAuthString\":   " + this.hdfsHttpAuthString);    // DEBUG!

        if ( ! parquetBaseDirectoryPath.endsWith(File.separator) )
            this.parquetBaseLocalDirectoryPath = parquetBaseDirectoryPath + File.separator;
@ -241,11 +241,11 @@ public class ParquetFileUtils {
        }

        String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_attempts_" + attemptsIncNum + ".parquet";
-        //logger.debug("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName);    // DEBUG!
+        //logger.trace("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName);    // DEBUG!

        String fullFilePath = currentParquetPath + fileName;
        if ( writeToParquet(recordList, attemptsSchema, fullFilePath) ) {
-            //logger.debug("Parquet file \"" + fileName + "\" was created and filled.");    // DEBUG!
+            //logger.trace("Parquet file \"" + fileName + "\" was created and filled.");    // DEBUG!

            // Upload and insert the data to the "attempt" Impala table.
            String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathAttempts);
@ -301,11 +301,11 @@ public class ParquetFileUtils {
        }

        String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_payloads.parquet";
-        //logger.debug("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName);    // DEBUG!
+        //logger.trace("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName);    // DEBUG!

        String fullFilePath = currentParquetPath + fileName;
        if ( writeToParquet(recordList, payloadsSchema, fullFilePath) ) {
-            //logger.debug("Parquet file \"" + fileName + "\" was created and filled.");    // DEBUG!
+            //logger.trace("Parquet file \"" + fileName + "\" was created and filled.");    // DEBUG!

            // Upload and insert the data to the "payload" Impala table.
            String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathPayloads);
@ -321,7 +321,7 @@ public class ParquetFileUtils {
    public boolean writeToParquet(List<GenericData.Record> recordList, Schema schema, String fullFilePath)
    {
        OutputFile outputFile;
-        try {
+        try {   // TODO - Verify that this will create any directories which do not exist in the provided path. Currently we create the directories beforehand.
            outputFile = HadoopOutputFile.fromPath(new Path(fullFilePath), new Configuration());
            //logger.trace("Created the parquet " + outputFile);  // DEBUG!
        } catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
@ -335,9 +335,9 @@ public class ParquetFileUtils {
            // When the app runs inside a Docker Container, it is NOT guaranteed that all compression-types will work. For example, the "SNAPPY"-compression does NOT work, while the "GZIP" works.
            // Also, we would prefer ZSTD over GZIP, but the old version of the Impala-Database does not support it..

-            //logger.debug("Going to write to \"" + fullFilePath + "\" the record list: " + recordList);  // DEBUG!
+            //logger.trace("Going to write to \"" + fullFilePath + "\" the record list: " + recordList);  // DEBUG!
            for ( GenericRecord record : recordList ) {
-                //logger.debug("Writing to \"" + fullFilePath + "\" the record: " + record);  // DEBUG!
+                //logger.trace("Writing to \"" + fullFilePath + "\" the record: " + record);  // DEBUG!
                writer.write(record);
            }
        } catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
@ -352,7 +352,7 @@ public class ParquetFileUtils {
            return false;
        }

-        //logger.debug("Done writing to \"" + fullFilePath + "\"");  // DEBUG!
+        //logger.trace("Done writing to \"" + fullFilePath + "\"");  // DEBUG!
        return true;
    }

@ -401,7 +401,7 @@ public class ParquetFileUtils {
                logger.error(errorMsg + "\n\n" + conn.getHeaderFields());
                return errorMsg;
            }
-            //logger.debug("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG!
+            //logger.trace("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG!

            location = StringUtils.replace(location, "http:", "https:", 1);
            // Unless we handle this here, we have to either complicate the process by handling the https-redirect or in any-way getting a hit in performance by having one more step each time ww want to upload a file.
@ -432,7 +432,7 @@ public class ParquetFileUtils {
                // Do not return here. The absence of the location is not critical. We can still create it on our own.
                location = parquetFileURI;  // This location does not include the "user.name" parameter.
            }
-            //logger.debug("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location);    // DEBUG!
+            //logger.trace("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location);    // DEBUG!

            // Important note!
            // Using the "load data inpath" command, he files are MOVED, not copied! So we don't have to delete them afterwards.
@ -459,7 +459,7 @@ public class ParquetFileUtils {
            ImpalaConnector.handleQueryException("loadParquetInTableQuery", loadParquetInTableQuery, e);  // It's already logged.
            return false;
        }
-        //logger.debug("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table.");    // DEBUG!
+        //logger.trace("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table.");    // DEBUG!
        return true;
    }

@ -512,7 +512,7 @@ public class ParquetFileUtils {
                }
                // Else, if an error message exists inside the response, then we will be alerted when parsing the Json bellow.

-                //logger.debug("\"jsonResponse\":\n" + jsonResponse); // DEBUG!
+                //logger.trace("\"jsonResponse\":\n" + jsonResponse); // DEBUG!

                boolean foundAttemptsDir = false;
                boolean foundPayloadsAggregatedDir = false;
@ -521,18 +521,18 @@ public class ParquetFileUtils {
                try {   // Parse the jsonData
                    JSONObject jObj = new JSONObject(jsonResponse); // Construct a JSONObject from the retrieved jsonData.
                    JSONObject entityObject = jObj.getJSONObject("FileStatuses");
-                    //logger.debug("EntityObject: " + entityObject.toString());	// DEBUG!
+                    //logger.trace("EntityObject: " + entityObject.toString());	// DEBUG!

                    JSONArray directoryStatuses = entityObject.getJSONArray("FileStatus");
-                    //logger.debug("directoryStatuses: " + directoryStatuses.toString());	// DEBUG!
+                    //logger.trace("directoryStatuses: " + directoryStatuses.toString());	// DEBUG!

                    // In case no fileStatuses are found, the follow for-loop will not run.
                    for ( Object fileStatusObject : directoryStatuses ) {
                        JSONObject fileStatusJsonObject = (JSONObject) fileStatusObject;
-                        //logger.debug("FileStatusJsonObject: " + fileStatusJsonObject.toString());	// DEBUG!
+                        //logger.trace("FileStatusJsonObject: " + fileStatusJsonObject.toString());	// DEBUG!

                        String dirPath = fileStatusJsonObject.getString("pathSuffix");
-                        //logger.debug("DirPath: " + dirPath);	// DEBUG!
+                        //logger.trace("DirPath: " + dirPath);	// DEBUG!

                        if ( dirPath.equals("attempts") )
                            foundAttemptsDir = true;
--- a/src/main/resources/logback-spring.xml
+++ b/src/main/resources/logback-spring.xml
@ -15,14 +15,14 @@

        <encoder>
            <charset>UTF-8</charset>
-            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
+            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
        </encoder>
    </appender>

    <appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
        <encoder>
            <charset>UTF-8</charset>
-            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
+            <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
        </encoder>
    </appender>