- Improve handling of the case, where the full-texts were found, but the Controller could not acquire them from the Worker.

- Add/improve logs and comments.
- Code cleanup.
This commit is contained in:
Lampros Smyrnaios 2022-09-28 22:34:33 +03:00
parent a22144bd51
commit b6340066a7
2 changed files with 15 additions and 10 deletions

View File

@ -257,8 +257,11 @@ public class UrlController {
} }
else if ( uploadFullTextsResponse == FileUtils.UploadFullTextsResponse.unsuccessful ) { else if ( uploadFullTextsResponse == FileUtils.UploadFullTextsResponse.unsuccessful ) {
logger.error("Failed to get and/or upload the fullTexts for batch-assignments_" + curReportAssignments); logger.error("Failed to get and/or upload the fullTexts for batch-assignments_" + curReportAssignments);
// The docUrls were still found! Just update ALL the fileLocations, sizes, hashes and mimetypes, to show that the files are not available and continue with writing the attempts and the payloads. // The docUrls were still found! Just update ALL the fileLocations, sizes, hashes and mimetypes, to show that the files are not available.
fileUtils.updateUrlReportsToHaveNoFullTextFiles(urlReports, false); fileUtils.updateUrlReportsToHaveNoFullTextFiles(urlReports, false);
// We write only the payloads which are connected with retrieved full-texts, uploaded to S3-Object-Store.
// We continue with writing the "attempts", as we want to avoid re-checking the failed-urls later.
// The urls which give full-text (no matter if we could not get it from the worker), are flagged as "couldRetry" anyway, so they will be picked-up to be checked again later.
} }
else else
logger.debug("Finished uploading the full-texts from batch-assignments_" + curReportAssignments); logger.debug("Finished uploading the full-texts from batch-assignments_" + curReportAssignments);
@ -270,7 +273,8 @@ public class UrlController {
final String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?)"; final String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?)";
final int[] attemptArgTypes = new int[] {Types.VARCHAR, Types.VARCHAR, Types.TIMESTAMP, Types.VARCHAR, Types.VARCHAR, Types.VARCHAR}; final int[] attemptArgTypes = new int[] {Types.VARCHAR, Types.VARCHAR, Types.TIMESTAMP, Types.VARCHAR, Types.VARCHAR, Types.VARCHAR};
final AtomicInteger failedCount = new AtomicInteger(0); final AtomicInteger failedQueriesCount = new AtomicInteger(0);
// Split the "UrlReports" into some sub-lists // Split the "UrlReports" into some sub-lists
int sizeOfEachSubList = (int)(sizeOUrlReports * 0.2); int sizeOfEachSubList = (int)(sizeOUrlReports * 0.2);
@ -316,7 +320,6 @@ public class UrlController {
} }
ImpalaConnector.databaseLock.lock(); ImpalaConnector.databaseLock.lock();
try { // Invoke all the tasks and wait for them to finish before moving to the next batch. try { // Invoke all the tasks and wait for them to finish before moving to the next batch.
insertsExecutor.invokeAll(callableTasks); insertsExecutor.invokeAll(callableTasks);
} catch (InterruptedException ie) { // In this case, any unfinished tasks are cancelled. } catch (InterruptedException ie) { // In this case, any unfinished tasks are cancelled.
@ -329,7 +332,7 @@ public class UrlController {
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
} }
int failedQueries = failedCount.get(); int failedQueries = failedQueriesCount.get();
String failedQueriesMsg = failedQueries + " out of " + (sizeOUrlReports *2) + " failed to be processed!"; String failedQueriesMsg = failedQueries + " out of " + (sizeOUrlReports *2) + " failed to be processed!";
logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables" + ((failedQueries > 0) ? (", although " + failedQueriesMsg) : ".") logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables" + ((failedQueries > 0) ? (", although " + failedQueriesMsg) : ".")
+ " Going to merge the parquet files for those tables."); + " Going to merge the parquet files for those tables.");
@ -354,11 +357,14 @@ public class UrlController {
ImpalaConnector.databaseLock.unlock(); ImpalaConnector.databaseLock.unlock();
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg);
} }
ImpalaConnector.databaseLock.unlock(); ImpalaConnector.databaseLock.unlock();
logger.debug("Finished merging the database tables."); logger.debug("Finished merging the database tables.");
if ( uploadFullTextsResponse == FileUtils.UploadFullTextsResponse.unsuccessful )
return ResponseEntity.status(HttpStatus.MULTI_STATUS).body("The full-text files failed to be acquired from the worker!\n" + failedQueriesMsg);
else
return ResponseEntity.status(HttpStatus.OK).body(failedQueriesMsg); return ResponseEntity.status(HttpStatus.OK).body(failedQueriesMsg);
} }
@ -391,7 +397,6 @@ public class UrlController {
try { // We use a "PreparedStatement" to do insertions, for security and valid SQL syntax reasons. try { // We use a "PreparedStatement" to do insertions, for security and valid SQL syntax reasons.
Object[] args = new Object[] {payload.getId(), payload.getOriginal_url(), payload.getTimestamp_acquired(), Object[] args = new Object[] {payload.getId(), payload.getOriginal_url(), payload.getTimestamp_acquired(),
urlReport.getStatus().toString(), String.valueOf(error.getType()), error.getMessage()}; urlReport.getStatus().toString(), String.valueOf(error.getType()), error.getMessage()};
jdbcTemplate.update(insertIntoAttemptBaseQuery, args, attemptArgTypes); jdbcTemplate.update(insertIntoAttemptBaseQuery, args, attemptArgTypes);
} catch (Exception e) { } catch (Exception e) {
logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + e.getMessage()); logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + e.getMessage());

View File

@ -178,7 +178,7 @@ public class FileUtils {
ArrayList<String> allFileNames = new ArrayList<>(allFileNamesWithPayloads.keySet()); ArrayList<String> allFileNames = new ArrayList<>(allFileNamesWithPayloads.keySet());
int numAllFullTexts = allFileNames.size(); int numAllFullTexts = allFileNames.size();
if ( numAllFullTexts == 0 ) { if ( numAllFullTexts == 0 ) {
logger.warn("The retrieved files where < 0 > for assignments_" + assignmentsBatchCounter + " | from worker: " + workerId); logger.warn("No full-text files were retrieved for assignments_" + assignmentsBatchCounter + " | from worker: " + workerId);
return UploadFullTextsResponse.successful; // It was handled, no error. return UploadFullTextsResponse.successful; // It was handled, no error.
} }
@ -207,7 +207,7 @@ public class FileUtils {
} }
} catch (RuntimeException re) { } catch (RuntimeException re) {
// The "cause" was logged inside "getConnection()". // The "cause" was logged inside "getConnection()".
failedBatches += (1 + (numOfBatches - batchCounter)); // Add this and the rest of the failed batches. failedBatches += (1 + (numOfBatches - batchCounter)); // The "failedBatches" will have the previously failedBatches + this one + the remaining batches which will likely fail too, thus, they will not be tested.
break; break;
} }
@ -368,7 +368,7 @@ public class FileUtils {
} }
public static String getMessageFromResponseBody(HttpURLConnection conn, boolean isError) { public String getMessageFromResponseBody(HttpURLConnection conn, boolean isError) {
StringBuilder msgStrB = new StringBuilder(500); StringBuilder msgStrB = new StringBuilder(500);
try ( BufferedReader br = new BufferedReader(new InputStreamReader((isError ? conn.getErrorStream() : conn.getInputStream()))) ) { // Try-with-resources try ( BufferedReader br = new BufferedReader(new InputStreamReader((isError ? conn.getErrorStream() : conn.getInputStream()))) ) { // Try-with-resources
String inputLine; String inputLine;