forked from lsmyrnaios/UrlsController
- Add the time-zone in the logs.
- Change some log-levels to "trace", although most of them are still disabled.
This commit is contained in:
parent
b6e8cd1889
commit
992d4ffd5e
|
@ -55,6 +55,7 @@ public class UrlsServiceImpl implements UrlsService {
|
|||
private static String excludedDatasourceIDsStringList = null;
|
||||
|
||||
public static final ExecutorService insertsExecutor = Executors.newFixedThreadPool(6);
|
||||
// TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?
|
||||
|
||||
|
||||
public UrlsServiceImpl(@Value("${services.pdfaggregation.controller.maxAttemptsPerRecord}") int maxAttemptsPerRecord, BulkImport bulkImport)
|
||||
|
@ -127,7 +128,7 @@ public class UrlsServiceImpl implements UrlsService {
|
|||
|
||||
|
||||
// The "order by" in the end makes sure the older attempted records will be re-attempted after a long time.
|
||||
//logger.debug("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG!
|
||||
logger.trace("findAssignmentsQuery:\n" + findAssignmentsQuery); // DEBUG!
|
||||
|
||||
final String getAssignmentsQuery = "select * from " + ImpalaConnector.databaseName + ".current_assignment";
|
||||
|
||||
|
|
|
@ -117,6 +117,7 @@ public class FileUtils {
|
|||
private static final int numOfFullTextsPerBatch = 70; // The HTTP-headers cannot be too large (It failed with 100 fileNames).
|
||||
|
||||
public static final ExecutorService hashMatchingExecutor = Executors.newFixedThreadPool(6);
|
||||
// TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?
|
||||
|
||||
|
||||
public UploadFullTextsResponse getAndUploadFullTexts(List<UrlReport> urlReports, HttpServletRequest request, long assignmentsBatchCounter, String workerId) {
|
||||
|
@ -173,7 +174,7 @@ public class FileUtils {
|
|||
|
||||
if ( alreadyFoundFileLocation != null ) { // If the full-text of this record is already-found and uploaded.
|
||||
payload.setLocation(alreadyFoundFileLocation); // Set the location to the older identical file, which was uploaded to S3. The other file-data is identical.
|
||||
//logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG!
|
||||
//logger.trace("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + alreadyFoundFileLocation + "\"."); // DEBUG!
|
||||
numFilesFoundFromPreviousAssignmentsBatches.incrementAndGet();
|
||||
numFullTextsFound.incrementAndGet();
|
||||
return null; // Do not request the file from the worker, it's already uploaded. Move on. The "location" will be filled my the "setFullTextForMultiplePayloads()" method, later.
|
||||
|
@ -332,9 +333,9 @@ public class FileUtils {
|
|||
conn.setRequestProperty("User-Agent", "UrlsController");
|
||||
conn.connect();
|
||||
int statusCode = conn.getResponseCode();
|
||||
if ( statusCode == -1 ) {
|
||||
if ( statusCode == -1 ) { // Invalid HTTP-Response.
|
||||
logger.warn("Problem when getting the \"status-code\" for url: " + requestUrl);
|
||||
throw new RuntimeException();
|
||||
throw new RuntimeException(); // Avoid any other batches.
|
||||
} else if ( statusCode != 200 ) {
|
||||
logger.warn("HTTP-" + statusCode + ": " + getMessageFromResponseBody(conn, true) + "\n\nProblem when requesting the ZstdFile of batch_" + batchNum + " from the Worker with ID \"" + workerId + "\" and requestUrl: " + requestUrl);
|
||||
if ( (statusCode >= 500) && (statusCode <= 599) )
|
||||
|
|
|
@ -110,7 +110,7 @@ public class ParquetFileUtils {
|
|||
else // At this point, all credential-checks have been made and there is no way the Controller can continue.
|
||||
throw new RuntimeException("No hdfs-credentials were given, in any form!");
|
||||
|
||||
//logger.debug("\"hdfsHttpAuthString\": " + this.hdfsHttpAuthString); // DEBUG!
|
||||
//logger.trace("\"hdfsHttpAuthString\": " + this.hdfsHttpAuthString); // DEBUG!
|
||||
|
||||
if ( ! parquetBaseDirectoryPath.endsWith(File.separator) )
|
||||
this.parquetBaseLocalDirectoryPath = parquetBaseDirectoryPath + File.separator;
|
||||
|
@ -241,11 +241,11 @@ public class ParquetFileUtils {
|
|||
}
|
||||
|
||||
String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_attempts_" + attemptsIncNum + ".parquet";
|
||||
//logger.debug("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName); // DEBUG!
|
||||
//logger.trace("Going to write " + recordsSize + " attempt-records to the parquet file: " + fileName); // DEBUG!
|
||||
|
||||
String fullFilePath = currentParquetPath + fileName;
|
||||
if ( writeToParquet(recordList, attemptsSchema, fullFilePath) ) {
|
||||
//logger.debug("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG!
|
||||
//logger.trace("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG!
|
||||
|
||||
// Upload and insert the data to the "attempt" Impala table.
|
||||
String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathAttempts);
|
||||
|
@ -301,11 +301,11 @@ public class ParquetFileUtils {
|
|||
}
|
||||
|
||||
String fileName = UrlsServiceImpl.assignmentsBatchCounter.get() + "_payloads.parquet";
|
||||
//logger.debug("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName); // DEBUG!
|
||||
//logger.trace("Going to write " + recordsSize + " payload-records to the parquet file: " + fileName); // DEBUG!
|
||||
|
||||
String fullFilePath = currentParquetPath + fileName;
|
||||
if ( writeToParquet(recordList, payloadsSchema, fullFilePath) ) {
|
||||
//logger.debug("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG!
|
||||
//logger.trace("Parquet file \"" + fileName + "\" was created and filled."); // DEBUG!
|
||||
|
||||
// Upload and insert the data to the "payload" Impala table.
|
||||
String errorMsg = uploadParquetFileToHDFS(fullFilePath, fileName, parquetHDFSDirectoryPathPayloads);
|
||||
|
@ -321,7 +321,7 @@ public class ParquetFileUtils {
|
|||
public boolean writeToParquet(List<GenericData.Record> recordList, Schema schema, String fullFilePath)
|
||||
{
|
||||
OutputFile outputFile;
|
||||
try {
|
||||
try { // TODO - Verify that this will create any directories which do not exist in the provided path. Currently we create the directories beforehand.
|
||||
outputFile = HadoopOutputFile.fromPath(new Path(fullFilePath), new Configuration());
|
||||
//logger.trace("Created the parquet " + outputFile); // DEBUG!
|
||||
} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
|
||||
|
@ -335,9 +335,9 @@ public class ParquetFileUtils {
|
|||
// When the app runs inside a Docker Container, it is NOT guaranteed that all compression-types will work. For example, the "SNAPPY"-compression does NOT work, while the "GZIP" works.
|
||||
// Also, we would prefer ZSTD over GZIP, but the old version of the Impala-Database does not support it..
|
||||
|
||||
//logger.debug("Going to write to \"" + fullFilePath + "\" the record list: " + recordList); // DEBUG!
|
||||
//logger.trace("Going to write to \"" + fullFilePath + "\" the record list: " + recordList); // DEBUG!
|
||||
for ( GenericRecord record : recordList ) {
|
||||
//logger.debug("Writing to \"" + fullFilePath + "\" the record: " + record); // DEBUG!
|
||||
//logger.trace("Writing to \"" + fullFilePath + "\" the record: " + record); // DEBUG!
|
||||
writer.write(record);
|
||||
}
|
||||
} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
|
||||
|
@ -352,7 +352,7 @@ public class ParquetFileUtils {
|
|||
return false;
|
||||
}
|
||||
|
||||
//logger.debug("Done writing to \"" + fullFilePath + "\""); // DEBUG!
|
||||
//logger.trace("Done writing to \"" + fullFilePath + "\""); // DEBUG!
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -401,7 +401,7 @@ public class ParquetFileUtils {
|
|||
logger.error(errorMsg + "\n\n" + conn.getHeaderFields());
|
||||
return errorMsg;
|
||||
}
|
||||
//logger.debug("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG!
|
||||
//logger.trace("The target location is: " + location + "\nWill do a silent redirect to HTTPS."); // DEBUG!
|
||||
|
||||
location = StringUtils.replace(location, "http:", "https:", 1);
|
||||
// Unless we handle this here, we have to either complicate the process by handling the https-redirect or in any-way getting a hit in performance by having one more step each time ww want to upload a file.
|
||||
|
@ -432,7 +432,7 @@ public class ParquetFileUtils {
|
|||
// Do not return here. The absence of the location is not critical. We can still create it on our own.
|
||||
location = parquetFileURI; // This location does not include the "user.name" parameter.
|
||||
}
|
||||
//logger.debug("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location); // DEBUG!
|
||||
//logger.trace("The file \"" + parquetFileName + "\" was successfully uploaded. It's location is: " + location); // DEBUG!
|
||||
|
||||
// Important note!
|
||||
// Using the "load data inpath" command, he files are MOVED, not copied! So we don't have to delete them afterwards.
|
||||
|
@ -459,7 +459,7 @@ public class ParquetFileUtils {
|
|||
ImpalaConnector.handleQueryException("loadParquetInTableQuery", loadParquetInTableQuery, e); // It's already logged.
|
||||
return false;
|
||||
}
|
||||
//logger.debug("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table."); // DEBUG!
|
||||
//logger.trace("The data from \"" + remoteParquetDataDirectory + "\" was loaded into the " + tableName + " table."); // DEBUG!
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -512,7 +512,7 @@ public class ParquetFileUtils {
|
|||
}
|
||||
// Else, if an error message exists inside the response, then we will be alerted when parsing the Json bellow.
|
||||
|
||||
//logger.debug("\"jsonResponse\":\n" + jsonResponse); // DEBUG!
|
||||
//logger.trace("\"jsonResponse\":\n" + jsonResponse); // DEBUG!
|
||||
|
||||
boolean foundAttemptsDir = false;
|
||||
boolean foundPayloadsAggregatedDir = false;
|
||||
|
@ -521,18 +521,18 @@ public class ParquetFileUtils {
|
|||
try { // Parse the jsonData
|
||||
JSONObject jObj = new JSONObject(jsonResponse); // Construct a JSONObject from the retrieved jsonData.
|
||||
JSONObject entityObject = jObj.getJSONObject("FileStatuses");
|
||||
//logger.debug("EntityObject: " + entityObject.toString()); // DEBUG!
|
||||
//logger.trace("EntityObject: " + entityObject.toString()); // DEBUG!
|
||||
|
||||
JSONArray directoryStatuses = entityObject.getJSONArray("FileStatus");
|
||||
//logger.debug("directoryStatuses: " + directoryStatuses.toString()); // DEBUG!
|
||||
//logger.trace("directoryStatuses: " + directoryStatuses.toString()); // DEBUG!
|
||||
|
||||
// In case no fileStatuses are found, the follow for-loop will not run.
|
||||
for ( Object fileStatusObject : directoryStatuses ) {
|
||||
JSONObject fileStatusJsonObject = (JSONObject) fileStatusObject;
|
||||
//logger.debug("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG!
|
||||
//logger.trace("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG!
|
||||
|
||||
String dirPath = fileStatusJsonObject.getString("pathSuffix");
|
||||
//logger.debug("DirPath: " + dirPath); // DEBUG!
|
||||
//logger.trace("DirPath: " + dirPath); // DEBUG!
|
||||
|
||||
if ( dirPath.equals("attempts") )
|
||||
foundAttemptsDir = true;
|
||||
|
|
|
@ -15,14 +15,14 @@
|
|||
|
||||
<encoder>
|
||||
<charset>UTF-8</charset>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %-5level %logger{36}.%M\(@%line\) - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<appender name="Console" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<charset>UTF-8</charset>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
|
||||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS z} [%thread] %highlight(%-5level) %cyan(%logger{36}.%M\(@%line\)) - %msg%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
|
|
Loading…
Reference in New Issue