- Fix not handling for the case where the info about the worker in the WorkerReport, does not exist inside the "workersInfoMap", as that worker is not participating in the Service. (this case may appear in future code)

- Code polishing.
This commit is contained in:
Lampros Smyrnaios 2023-08-30 17:07:51 +03:00
parent c32dfa882e
commit 601776e81c
2 changed files with 10 additions and 7 deletions

View File

@ -287,8 +287,11 @@ public class UrlsServiceImpl implements UrlsService {
logger.info("Initializing the addition of the worker's (" + curWorkerId + ") report for assignments_" + curReportAssignmentsCounter);
// Before continuing with inserts, take and upload the fullTexts from the Worker. Also, update the file-"location".
FileUtils.UploadFullTextsResponse uploadFullTextsResponse = fileUtils.getAndUploadFullTexts(urlReports, curReportAssignmentsCounter, curWorkerId);
if ( uploadFullTextsResponse == FileUtils.UploadFullTextsResponse.databaseError ) {
FileUtils.UploadFullTextsResponse uploadFullTextsResponse = fileUtils.getAndUploadFullTexts(urlReports, sizeOfUrlReports, curReportAssignmentsCounter, curWorkerId);
if ( uploadFullTextsResponse == null ) {
// Nothing to post to the Worker, since we do not have the worker's info.
return false;
} else if ( uploadFullTextsResponse == FileUtils.UploadFullTextsResponse.databaseError ) {
postReportResultToWorker(curWorkerId, curReportAssignmentsCounter, "Problem with the Impala-database!");
return false;
}

View File

@ -193,9 +193,10 @@ public class FileUtils {
// TODO - Unify this ExecutorService with the hash-matching executorService. Since one will ALWAYS be called after the other. So why having two ExecServices to handle?
public UploadFullTextsResponse getAndUploadFullTexts(List<UrlReport> urlReports, long assignmentsBatchCounter, String workerId) {
public UploadFullTextsResponse getAndUploadFullTexts(List<UrlReport> urlReports, int sizeOfUrlReports, long assignmentsBatchCounter, String workerId) throws RuntimeException
{
// The Controller have to request the files from the Worker, in order to upload them to the S3.
// We will have to UPDATE the "location" of each of those files in the UrlReports and then insert them all into the database.
// We UPDATE the "location" of each of those files in the UrlReports and then insert them all into the database.
String workerIp = null;
WorkerInfo workerInfo = UrlsController.workersInfoMap.get(workerId);
@ -208,9 +209,8 @@ public class FileUtils {
// Get the file-locations.
AtomicInteger numFullTextsFound = new AtomicInteger();
AtomicInteger numFilesFoundFromPreviousAssignmentsBatches = new AtomicInteger();
int urlReportsSize = urlReports.size();
SetMultimap<String, Payload> allFileNamesWithPayloads = Multimaps.synchronizedSetMultimap(HashMultimap.create((urlReportsSize / 5), 3)); // Holds multiple values for any key, if a fileName(key) has many IDs(values) associated with it.
SetMultimap<String, Payload> allFileNamesWithPayloads = Multimaps.synchronizedSetMultimap(HashMultimap.create((sizeOfUrlReports / 5), 3)); // Holds multiple values for any key, if a fileName(key) has many IDs(values) associated with it.
final String getFileLocationForHashQuery = "select `location` from " + DatabaseConnector.databaseName + ".payload" + (isTestEnvironment ? "_aggregated" : "") + " where `hash` = ? limit 1";
final int[] hashArgType = new int[] {Types.VARCHAR};
@ -295,7 +295,7 @@ public class FileUtils {
DatabaseConnector.databaseLock.unlock(); // The remaining work of this function does not use the database.
}
logger.info("NumFullTextsFound by assignments_" + assignmentsBatchCounter + " = " + numFullTextsFound.get() + " (out of " + urlReportsSize + " | about " + df.format(numFullTextsFound.get() * 100.0 / urlReportsSize) + "%).");
logger.info("NumFullTextsFound by assignments_" + assignmentsBatchCounter + " = " + numFullTextsFound.get() + " (out of " + sizeOfUrlReports + " | about " + df.format(numFullTextsFound.get() * 100.0 / sizeOfUrlReports) + "%).");
logger.debug("NumFilesFoundFromPreviousAssignmentsBatches = " + numFilesFoundFromPreviousAssignmentsBatches.get());
ArrayList<String> allFileNames = new ArrayList<>(allFileNamesWithPayloads.keySet());