- Handle the case, where an instance of a urlReport record (having the same id and sourceUrl), may have failed to give a docUrl, due to en error, even if another instance gives the docUrl and the docFile. The absence of that handling could lead to a record-instance, being assigned a "fileLocation" which was actually an error-message (comment), and as a result the real "fileLocation" would have never been reached to be assigned, so the payload would be lost.

- Improve exceptions-handling.
This commit is contained in:
Lampros Smyrnaios 2023-02-21 15:22:49 +02:00
parent 9888349bef
commit 84a37bd4b7
3 changed files with 22 additions and 4 deletions

View File

@ -48,7 +48,12 @@ public class UrlsWorkerApplication {
public static void gentleAppShutdown()
{
int exitCode = SpringApplication.exit(context, () -> 0); // The "PreDestroy" method will be called. (the "context" will be closed automatically (I checked it))
int exitCode = 0;
try {
exitCode = SpringApplication.exit(context, () -> 0); // The "PreDestroy" method will be called. (the "context" will be closed automatically (I checked it))
} catch (IllegalArgumentException iae) {
logger.error(iae.getMessage()); // This will say "Context must not be null", in case the "gentleAppShutdown()" was called too early in the app's lifetime. But it's ok.
}
System.exit(exitCode);
}

View File

@ -92,6 +92,9 @@ public class AssignmentsHandler {
logger.error("Could not retrieve the assignments!\n" + rce.getMessage()); // It shows the response body (from Spring v.2.5.6 onwards).
hadConnectionErrorOnRequest = true;
return null;
} catch (IllegalArgumentException iae) {
logger.error("Could not retrieve the assignments, as the provided Controller's url was malformed!\n" + iae.getMessage());
UrlsWorkerApplication.gentleAppShutdown();
}
//logger.debug(assignmentRequest.toString()); // DEBUG!
@ -138,7 +141,7 @@ public class AssignmentsHandler {
// TODO - Decide which tasks run with what plugin (depending on their datasource).
// First run -in parallel- the tasks which require some specific plugin.
// Then run the remaining tasks in the generic plugin (which handles parallelism itself).
// Then, after the above plugins are finished, run the remaining tasks in the generic plugin (which handles parallelism itself).
// For now, let's just run all tasks in the generic plugin.
try {

View File

@ -196,7 +196,7 @@ public class PublicationsRetrieverPlugin {
if ( "true".equals(data.getWasDocumentOrDatasetAccessible()) ) // The reversed order defends against a potential NPE.
{
status = UrlReport.StatusType.accessible;
if ( comment.startsWith(ConnSupportUtils.alreadyDownloadedFromIDMessage, 0) ) {
if ( comment.startsWith(ConnSupportUtils.alreadyDownloadedFromIDMessage, 0) ) { // If this is not the initially-found docUrl record, go search for the initial.
// The file of this docUrl was already downloaded by another docUrl.
int indexOfAlreadyDownloadedFromSourceUrlMessage = comment.indexOf(ConnSupportUtils.alreadyDownloadedFromSourceUrlContinuedMessage);
int indexOfAlreadyDownloadedFromSourceUrl = indexOfAlreadyDownloadedFromSourceUrlMessage + lengthOfAlreadyDownloadedFromSourceUrlContinuedMessage;
@ -211,15 +211,25 @@ public class PublicationsRetrieverPlugin {
if ( ! (data_2.getUrlId().equals(initialId) && (data_2.getSourceUrl().equals(initialSourceUrl))) )
continue;
// At this point we have found a record which has the same id and sourceUrl as the inspected record.
foundIDUrlInWorkerReport = true;
if ( "false".equals(data_2.getWasDocumentOrDatasetAccessible()) )
continue;
// At this point we have excluded any non-docUrl record, even if it has the same id and sourceUrl.
// It is possible, that the same sourceUrl at one time it gives the docUrl and at another it does not, due to some kind of error.
// So, we do not want to accept a record-instance which does not lead to any file, even if another instance of the same record did lead to a file.
String tempFileLocation = data_2.getComment();
if ( tempFileLocation.startsWith(ConnSupportUtils.alreadyDownloadedFromIDMessage, 0) || tempFileLocation.startsWith(HttpConnUtils.docFileNotRetrievedMessage, 0) )
continue;
// At this point we have found that another instance of the same record gives the docFile itself, not a reference to it.
fileLocation = tempFileLocation;
size = data_2.getSize();
hash = data_2.getHash();
mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is assigned correctly.
mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is assigned to the value provided by the plugin (it has to be added in the future).
foundAlreadyDownloadedFullText = true;
break;
}