From 84a37bd4b79ecf9db3673bf7f3126e26937533b6 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Tue, 21 Feb 2023 15:22:49 +0200 Subject: [PATCH] - Handle the case, where an instance of a urlReport record (having the same id and sourceUrl), may have failed to give a docUrl, due to en error, even if another instance gives the docUrl and the docFile. The absence of that handling could lead to a record-instance, being assigned a "fileLocation" which was actually an error-message (comment), and as a result the real "fileLocation" would have never been reached to be assigned, so the payload would be lost. - Improve exceptions-handling. --- .../urls_worker/UrlsWorkerApplication.java | 7 ++++++- .../urls_worker/components/AssignmentsHandler.java | 5 ++++- .../plugins/PublicationsRetrieverPlugin.java | 14 ++++++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java b/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java index 7d9b430..5855734 100644 --- a/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java +++ b/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java @@ -48,7 +48,12 @@ public class UrlsWorkerApplication { public static void gentleAppShutdown() { - int exitCode = SpringApplication.exit(context, () -> 0); // The "PreDestroy" method will be called. (the "context" will be closed automatically (I checked it)) + int exitCode = 0; + try { + exitCode = SpringApplication.exit(context, () -> 0); // The "PreDestroy" method will be called. (the "context" will be closed automatically (I checked it)) + } catch (IllegalArgumentException iae) { + logger.error(iae.getMessage()); // This will say "Context must not be null", in case the "gentleAppShutdown()" was called too early in the app's lifetime. But it's ok. + } System.exit(exitCode); } diff --git a/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java b/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java index cff4786..c0781c8 100644 --- a/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java +++ b/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java @@ -92,6 +92,9 @@ public class AssignmentsHandler { logger.error("Could not retrieve the assignments!\n" + rce.getMessage()); // It shows the response body (from Spring v.2.5.6 onwards). hadConnectionErrorOnRequest = true; return null; + } catch (IllegalArgumentException iae) { + logger.error("Could not retrieve the assignments, as the provided Controller's url was malformed!\n" + iae.getMessage()); + UrlsWorkerApplication.gentleAppShutdown(); } //logger.debug(assignmentRequest.toString()); // DEBUG! @@ -138,7 +141,7 @@ public class AssignmentsHandler { // TODO - Decide which tasks run with what plugin (depending on their datasource). // First run -in parallel- the tasks which require some specific plugin. - // Then run the remaining tasks in the generic plugin (which handles parallelism itself). + // Then, after the above plugins are finished, run the remaining tasks in the generic plugin (which handles parallelism itself). // For now, let's just run all tasks in the generic plugin. try { diff --git a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java index 08ae497..b89981a 100644 --- a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java @@ -196,7 +196,7 @@ public class PublicationsRetrieverPlugin { if ( "true".equals(data.getWasDocumentOrDatasetAccessible()) ) // The reversed order defends against a potential NPE. { status = UrlReport.StatusType.accessible; - if ( comment.startsWith(ConnSupportUtils.alreadyDownloadedFromIDMessage, 0) ) { + if ( comment.startsWith(ConnSupportUtils.alreadyDownloadedFromIDMessage, 0) ) { // If this is not the initially-found docUrl record, go search for the initial. // The file of this docUrl was already downloaded by another docUrl. int indexOfAlreadyDownloadedFromSourceUrlMessage = comment.indexOf(ConnSupportUtils.alreadyDownloadedFromSourceUrlContinuedMessage); int indexOfAlreadyDownloadedFromSourceUrl = indexOfAlreadyDownloadedFromSourceUrlMessage + lengthOfAlreadyDownloadedFromSourceUrlContinuedMessage; @@ -211,15 +211,25 @@ public class PublicationsRetrieverPlugin { if ( ! (data_2.getUrlId().equals(initialId) && (data_2.getSourceUrl().equals(initialSourceUrl))) ) continue; + // At this point we have found a record which has the same id and sourceUrl as the inspected record. foundIDUrlInWorkerReport = true; + + if ( "false".equals(data_2.getWasDocumentOrDatasetAccessible()) ) + continue; + + // At this point we have excluded any non-docUrl record, even if it has the same id and sourceUrl. + // It is possible, that the same sourceUrl at one time it gives the docUrl and at another it does not, due to some kind of error. + // So, we do not want to accept a record-instance which does not lead to any file, even if another instance of the same record did lead to a file. + String tempFileLocation = data_2.getComment(); if ( tempFileLocation.startsWith(ConnSupportUtils.alreadyDownloadedFromIDMessage, 0) || tempFileLocation.startsWith(HttpConnUtils.docFileNotRetrievedMessage, 0) ) continue; + // At this point we have found that another instance of the same record gives the docFile itself, not a reference to it. fileLocation = tempFileLocation; size = data_2.getSize(); hash = data_2.getHash(); - mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is assigned correctly. + mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is assigned to the value provided by the plugin (it has to be added in the future). foundAlreadyDownloadedFullText = true; break; }