The Controller's Application receives requests coming from the [**Workers**](https://code-repo.d4science.org/lsmyrnaios/UrlsWorker) (deployed on the cloud), constructs an assignments-list with data received from a database and returns the list to the workers.<br>
Then, it receives the "WorkerReports", it requests the full-texts from the workers, in batches, and uploads them on the S3-Object-Store. Finally, it writes the related reports, along with the updated file-locations into the database.<br>
logger.error(initMsg+" The request came from another IP: " +remoteAddr+" | while this worker was registered with this IP: " +workerInfo.getWorkerIP());
logger.error(initMsg+" The request came from an IP different from the one this worker was registered with: " +workerInfo.getWorkerIP());
// The report-file will be overwritten every now and then, instead of appended, since we want to add an update new JSON report-object each time.
// Also, we do not want to write the object in the end (in its final form), since we want the user to have the ability to request the report at any time,
// after submitting the bulk-import request, to see its progress (since the number of file may be very large and the processing may take many hours).
@ -240,6 +240,7 @@ public class BulkImportServiceImpl implements BulkImportService {
Stringmsg="Going to import "+numOfFilesInSegment+" files, for segment-"+segmentCounter;
StringerrorMsg="Exception when uploading the files of segment_"+segmentCounter+" to the S3 Object Store. Will avoid uploading any file for this segment..";
StringerrorMsg="Exception when uploading the files of segment_"+segmentCounter+" to the S3 Object Store. Will avoid uploading the rest of the files for this segment..";
logger.error(errorMsg+additionalLoggingMsg);
bulkImportReport.addEvent(errorMsg);
for(intj=i;j<numOfFilesInSegment;++j)
@ -269,14 +270,14 @@ public class BulkImportServiceImpl implements BulkImportService {
failedFiles.add(fileLocation);
}
if(((++counter)%150)==0){// Every 150 files, report the status for this segment.
if(((++counter)%150)==0){// Every 150 files, report the status for this segment and right it to the file.
msg="Progress for segment-"+segmentCounter+" : "+payloadRecords.size()+" files have been imported and "+failedFiles.size()+" have failed, out of "+numOfFilesInSegment+" files.";
StringactualUrl=(bulkImportSource.getPdfUrlPrefix()+fileNameID);// This string-concatenation, works with urls of Arvix. A different construction may be needed for other datasources.
StringoriginalUrl=actualUrl;// We have the full-text files from bulk-import, so let's assume the original-url is also the full-text-link.
@ -403,10 +408,6 @@ public class BulkImportServiceImpl implements BulkImportService {
// We write only the payloads which are connected with retrieved full-texts, uploaded to S3-Object-Store.
// We continue with writing the "attempts", as we want to avoid re-checking the failed-urls later.
// The urls which give full-text (no matter if we could not get it from the worker), are flagged as "couldRetry" anyway, so they will be picked-up to be checked again later.
logger.error("None of the "+numOfBatches+" batches could be handled for assignments_"+assignmentsBatchCounter+", for worker: "+workerId);
updateUrlReportsToHaveNoFullTextFiles(urlReports,true);// Make sure all records without an S3-Url have < null > file-data (some batches or uploads might have failed).
removeUnretrievedFullTextsFromUrlReports(urlReports,true);// Make sure all records without an S3-Url have < null > file-data (some batches or uploads might have failed).