forked from lsmyrnaios/UrlsController
- Improve handling of the case, where the full-texts were found, but the Controller could not acquire them from the Worker.
- Add/improve logs and comments. - Code cleanup.
This commit is contained in:
parent
a22144bd51
commit
b6340066a7
|
@ -257,8 +257,11 @@ public class UrlController {
|
|||
}
|
||||
else if ( uploadFullTextsResponse == FileUtils.UploadFullTextsResponse.unsuccessful ) {
|
||||
logger.error("Failed to get and/or upload the fullTexts for batch-assignments_" + curReportAssignments);
|
||||
// The docUrls were still found! Just update ALL the fileLocations, sizes, hashes and mimetypes, to show that the files are not available and continue with writing the attempts and the payloads.
|
||||
// The docUrls were still found! Just update ALL the fileLocations, sizes, hashes and mimetypes, to show that the files are not available.
|
||||
fileUtils.updateUrlReportsToHaveNoFullTextFiles(urlReports, false);
|
||||
// We write only the payloads which are connected with retrieved full-texts, uploaded to S3-Object-Store.
|
||||
// We continue with writing the "attempts", as we want to avoid re-checking the failed-urls later.
|
||||
// The urls which give full-text (no matter if we could not get it from the worker), are flagged as "couldRetry" anyway, so they will be picked-up to be checked again later.
|
||||
}
|
||||
else
|
||||
logger.debug("Finished uploading the full-texts from batch-assignments_" + curReportAssignments);
|
||||
|
@ -270,7 +273,8 @@ public class UrlController {
|
|||
final String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?)";
|
||||
final int[] attemptArgTypes = new int[] {Types.VARCHAR, Types.VARCHAR, Types.TIMESTAMP, Types.VARCHAR, Types.VARCHAR, Types.VARCHAR};
|
||||
|
||||
final AtomicInteger failedCount = new AtomicInteger(0);
|
||||
final AtomicInteger failedQueriesCount = new AtomicInteger(0);
|
||||
|
||||
|
||||
// Split the "UrlReports" into some sub-lists
|
||||
int sizeOfEachSubList = (int)(sizeOUrlReports * 0.2);
|
||||
|
@ -316,7 +320,6 @@ public class UrlController {
|
|||
}
|
||||
|
||||
ImpalaConnector.databaseLock.lock();
|
||||
|
||||
try { // Invoke all the tasks and wait for them to finish before moving to the next batch.
|
||||
insertsExecutor.invokeAll(callableTasks);
|
||||
} catch (InterruptedException ie) { // In this case, any unfinished tasks are cancelled.
|
||||
|
@ -329,7 +332,7 @@ public class UrlController {
|
|||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
||||
}
|
||||
|
||||
int failedQueries = failedCount.get();
|
||||
int failedQueries = failedQueriesCount.get();
|
||||
String failedQueriesMsg = failedQueries + " out of " + (sizeOUrlReports *2) + " failed to be processed!";
|
||||
logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables" + ((failedQueries > 0) ? (", although " + failedQueriesMsg) : ".")
|
||||
+ " Going to merge the parquet files for those tables.");
|
||||
|
@ -354,11 +357,14 @@ public class UrlController {
|
|||
ImpalaConnector.databaseLock.unlock();
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg);
|
||||
}
|
||||
|
||||
ImpalaConnector.databaseLock.unlock();
|
||||
|
||||
logger.debug("Finished merging the database tables.");
|
||||
if ( uploadFullTextsResponse == FileUtils.UploadFullTextsResponse.unsuccessful )
|
||||
return ResponseEntity.status(HttpStatus.MULTI_STATUS).body("The full-text files failed to be acquired from the worker!\n" + failedQueriesMsg);
|
||||
else
|
||||
return ResponseEntity.status(HttpStatus.OK).body(failedQueriesMsg);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -391,7 +397,6 @@ public class UrlController {
|
|||
try { // We use a "PreparedStatement" to do insertions, for security and valid SQL syntax reasons.
|
||||
Object[] args = new Object[] {payload.getId(), payload.getOriginal_url(), payload.getTimestamp_acquired(),
|
||||
urlReport.getStatus().toString(), String.valueOf(error.getType()), error.getMessage()};
|
||||
|
||||
jdbcTemplate.update(insertIntoAttemptBaseQuery, args, attemptArgTypes);
|
||||
} catch (Exception e) {
|
||||
logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + e.getMessage());
|
||||
|
|
|
@ -178,7 +178,7 @@ public class FileUtils {
|
|||
ArrayList<String> allFileNames = new ArrayList<>(allFileNamesWithPayloads.keySet());
|
||||
int numAllFullTexts = allFileNames.size();
|
||||
if ( numAllFullTexts == 0 ) {
|
||||
logger.warn("The retrieved files where < 0 > for assignments_" + assignmentsBatchCounter + " | from worker: " + workerId);
|
||||
logger.warn("No full-text files were retrieved for assignments_" + assignmentsBatchCounter + " | from worker: " + workerId);
|
||||
return UploadFullTextsResponse.successful; // It was handled, no error.
|
||||
}
|
||||
|
||||
|
@ -207,7 +207,7 @@ public class FileUtils {
|
|||
}
|
||||
} catch (RuntimeException re) {
|
||||
// The "cause" was logged inside "getConnection()".
|
||||
failedBatches += (1 + (numOfBatches - batchCounter)); // Add this and the rest of the failed batches.
|
||||
failedBatches += (1 + (numOfBatches - batchCounter)); // The "failedBatches" will have the previously failedBatches + this one + the remaining batches which will likely fail too, thus, they will not be tested.
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -368,7 +368,7 @@ public class FileUtils {
|
|||
}
|
||||
|
||||
|
||||
public static String getMessageFromResponseBody(HttpURLConnection conn, boolean isError) {
|
||||
public String getMessageFromResponseBody(HttpURLConnection conn, boolean isError) {
|
||||
StringBuilder msgStrB = new StringBuilder(500);
|
||||
try ( BufferedReader br = new BufferedReader(new InputStreamReader((isError ? conn.getErrorStream() : conn.getInputStream()))) ) { // Try-with-resources
|
||||
String inputLine;
|
||||
|
|
Loading…
Reference in New Issue