- Add the time-zone in the logs.

- Change some log-levels to "trace", although most of them are still disabled.
This commit is contained in:
Lampros Smyrnaios 2023-05-11 03:10:53 +03:00
parent b6e8cd1889
commit 992d4ffd5e
4 changed files with 25 additions and 23 deletions

View File

@ -55,6 +55,7 @@ public class UrlsServiceImpl implements UrlsService {
private static String excludedDatasourceIDsStringList = null;
public static final ExecutorService insertsExecutor = Executors.newFixedThreadPool(6);
// TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?
public UrlsServiceImpl(@Value("${services.pdfaggregation.controller.maxAttemptsPerRecord}") int maxAttemptsPerRecord, BulkImport bulkImport)
@ -127,7 +128,7 @@ public class UrlsServiceImpl implements UrlsService {
// The "order by" in the end makes sure the older attempted records will be re-attempted after a long time.
//logger.debug("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG!
logger.trace("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG!
final String getAssignmentsQuery = "select * from " + ImpalaConnector.databaseName + ".current_assignment";

View File

@ -117,6 +117,7 @@ public class FileUtils {
private static final int numOfFullTextsPerBatch = 70; // The HTTP-headers cannot be too large (It failed with 100 fileNames).
public static final ExecutorService hashMatchingExecutor = Executors.newFixedThreadPool(6);
// TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?
public UploadFullTextsResponse getAndUploadFullTexts(List<UrlReport> urlReports, HttpServletRequest request, long assignmentsBatchCounter, String workerId) {
@ -173,7 +174,7 @@ public class FileUtils {
if ( alreadyFoundFileLocation != null ) { // If the full-text of this record is already-found and uploaded.
payload.setLocation(alreadyFoundFileLocation); // Set the location to the older identical file, which was uploaded to S3. The other file-data is identical.
//logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG!
//logger.trace("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG!
numFilesFoundFromPreviousAssignmentsBatches.incrementAndGet();
numFullTextsFound.incrementAndGet();
return null; // Do not request the file from the worker, it's already uploaded. Move on. The "location" will be filled my the "setFullTextForMultiplePayloads()" method, later.
@ -332,9 +333,9 @@ public class FileUtils {
conn.setRequestProperty("User-Agent", "UrlsController");
conn.connect();
int statusCode = conn.getResponseCode();
if ( statusCode == -1 ) {
if ( statusCode == -1 ) { // Invalid HTTP-Response.
logger.warn("Problem when getting the \"status-code\" for url: " + requestUrl);
throw new RuntimeException();
throw new RuntimeException(); // Avoid any other batches.
} else if ( statusCode != 200 ) {
logger.warn("HTTP-" + statusCode + ": " + getMessageFromResponseBody(conn, true) + "\n\nProblem when requesting the ZstdFile of batch_" + batchNum + " from the Worker with ID \"" + workerId + "\" and requestUrl: " + requestUrl);
if ( (statusCode >= 500) && (statusCode <= 599) )

View File

@ -110,7 +110,7 @@ public class ParquetFileUtils {
else // At this point, all credential-checks have been made and there is no way the Controller can continue.
throw new RuntimeException("No hdfs-credentials were given, in any form!");
//logger.debug("\"hdfsHttpAuthString\": " + this.hdfsHttpAuthString); // DEBUG!
//logger.trace("\"hdfsHttpAuthString\": " + this.hdfsHttpAuthString); // DEBUG!
if ( ! parquetBaseDirectoryPath.endsWith(File.separator) )
this.parquetBaseLocalDirectoryPath = parquetBaseDirectoryPath + File.separator;
@ -241,11 +241,11 @@ public class ParquetFileUtils {
}
String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_attempts_" + attemptsIncNum + ".parquet";
//logger.debug("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName); // DEBUG!
//logger.trace("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName); // DEBUG!
String fullFilePath = currentParquetPath + fileName;
if ( writeToParquet(recordList, attemptsSchema, fullFilePath) ) {
//logger.debug("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG!
//logger.trace("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG!
// Upload and insert the data to the "attempt" Impala table.
String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathAttempts);
@ -301,11 +301,11 @@ public class ParquetFileUtils {
}
String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_payloads.parquet";
//logger.debug("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName); // DEBUG!
//logger.trace("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName); // DEBUG!
String fullFilePath = currentParquetPath + fileName;
if ( writeToParquet(recordList, payloadsSchema, fullFilePath) ) {
//logger.debug("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG!
//logger.trace("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG!
// Upload and insert the data to the "payload" Impala table.
String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathPayloads);
@ -321,7 +321,7 @@ public class ParquetFileUtils {
public boolean writeToParquet(List<GenericData.Record> recordList, Schema schema, String fullFilePath)
{
OutputFile outputFile;
try {
try { // TODO - Verify that this will create any directories which do not exist in the provided path. Currently we create the directories beforehand.
outputFile = HadoopOutputFile.fromPath(new Path(fullFilePath), new Configuration());
//logger.trace("Created the parquet " + outputFile); // DEBUG!
} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
@ -335,9 +335,9 @@ public class ParquetFileUtils {
// When the app runs inside a Docker Container, it is NOT guaranteed that all compression-types will work. For example, the "SNAPPY"-compression does NOT work, while the "GZIP" works.
// Also, we would prefer ZSTD over GZIP, but the old version of the Impala-Database does not support it..
//logger.debug("Going to write to \"" + fullFilePath + "\" the record list: " + recordList); // DEBUG!
//logger.trace("Going to write to \"" + fullFilePath + "\" the record list: " + recordList); // DEBUG!
for ( GenericRecord record : recordList ) {
//logger.debug("Writing to \"" + fullFilePath + "\" the record: " + record); // DEBUG!
//logger.trace("Writing to \"" + fullFilePath + "\" the record: " + record); // DEBUG!
writer.write(record);
}
} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
@ -352,7 +352,7 @@ public class ParquetFileUtils {
return false;
}
//logger.debug("Done writing to \"" + fullFilePath + "\""); // DEBUG!
//logger.trace("Done writing to \"" + fullFilePath + "\""); // DEBUG!
return true;
}
@ -401,7 +401,7 @@ public class ParquetFileUtils {
logger.error(errorMsg + "\n\n" + conn.getHeaderFields());
return errorMsg;
}
//logger.debug("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG!
//logger.trace("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG!
location = StringUtils.replace(location, "http:", "https:", 1);
// Unless we handle this here, we have to either complicate the process by handling the https-redirect or in any-way getting a hit in performance by having one more step each time ww want to upload a file.
@ -432,7 +432,7 @@ public class ParquetFileUtils {
// Do not return here. The absence of the location is not critical. We can still create it on our own.
location = parquetFileURI; // This location does not include the "user.name" parameter.
}
//logger.debug("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location); // DEBUG!
//logger.trace("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location); // DEBUG!
// Important note!
// Using the "load data inpath" command, he files are MOVED, not copied! So we don't have to delete them afterwards.
@ -459,7 +459,7 @@ public class ParquetFileUtils {
ImpalaConnector.handleQueryException("loadParquetInTableQuery", loadParquetInTableQuery, e); // It's already logged.
return false;
}
//logger.debug("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table."); // DEBUG!
//logger.trace("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table."); // DEBUG!
return true;
}
@ -512,7 +512,7 @@ public class ParquetFileUtils {
}
// Else, if an error message exists inside the response, then we will be alerted when parsing the Json bellow.
//logger.debug("\"jsonResponse\":\n" + jsonResponse); // DEBUG!
//logger.trace("\"jsonResponse\":\n" + jsonResponse); // DEBUG!
boolean foundAttemptsDir = false;
boolean foundPayloadsAggregatedDir = false;
@ -521,18 +521,18 @@ public class ParquetFileUtils {
try { // Parse the jsonData
JSONObject jObj = new JSONObject(jsonResponse); // Construct a JSONObject from the retrieved jsonData.
JSONObject entityObject = jObj.getJSONObject("FileStatuses");
//logger.debug("EntityObject: " + entityObject.toString()); // DEBUG!
//logger.trace("EntityObject: " + entityObject.toString()); // DEBUG!
JSONArray directoryStatuses = entityObject.getJSONArray("FileStatus");
//logger.debug("directoryStatuses: " + directoryStatuses.toString()); // DEBUG!
//logger.trace("directoryStatuses: " + directoryStatuses.toString()); // DEBUG!
// In case no fileStatuses are found, the follow for-loop will not run.
for ( Object fileStatusObject : directoryStatuses ) {
JSONObject fileStatusJsonObject = (JSONObject) fileStatusObject;
//logger.debug("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG!
//logger.trace("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG!
String dirPath = fileStatusJsonObject.getString("pathSuffix");
//logger.debug("DirPath: " + dirPath); // DEBUG!
//logger.trace("DirPath: " + dirPath); // DEBUG!
if ( dirPath.equals("attempts") )
foundAttemptsDir = true;

View File

@ -15,14 +15,14 @@
<encoder>
<charset>UTF-8</charset>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
</encoder>
</appender>
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<charset>UTF-8</charset>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
</encoder>
</appender>