diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index 12a5d8c..b492eeb 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -166,6 +166,7 @@ public class PublicationsRetrieverPlugin { String initialSourceUrl = comment.substring(indexOfAlreadyDownloadedFromSourceUrl); //logger.debug("initialId: " + initialId + " | sourceUrl: " + initialSourceUrl); // DEBUG! // Search that ID and sourceUrl inside the list, if that instance is the first-found one, then get the file-data (there might be duplicate ID-sourceUrl instances, but only one of them has the file-data). + boolean foundAlreadyDownloadedFullText = false; for ( DataToBeLogged data_2 : FileUtils.dataToBeLoggedList ) { if ( data_2.getUrlId().equals(initialId) && (data_2.getSourceUrl().equals(initialSourceUrl)) && ! data_2.getComment().startsWith(UrlUtils.alreadyDownloadedFromIDMessage) ) { @@ -173,11 +174,13 @@ public class PublicationsRetrieverPlugin { size = data_2.getSize(); hash = data_2.getHash(); mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is correctly specified. + foundAlreadyDownloadedFullText = true; break; } } // In case the "alreadyDownloaded" full-text is not found, we have an error. - error = new Error(Error.ErrorType.couldRetry, comment + " | That ID-sourceUrl was not found inside the WorkerReport!"); // We can still try to download it from the found docUrl, in the future. + if ( !foundAlreadyDownloadedFullText ) + error = new Error(Error.ErrorType.couldRetry, comment + " | That ID-sourceUrl was not found inside the WorkerReport!"); // We can still try to download it from the found docUrl, in the future. } else if ( ! comment.equals(HttpConnUtils.docFileNotRetrievedMessage) ) { // If it was downloaded without an error. fileLocation = comment; // This is the full-file-path. diff --git a/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java b/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java index 138ca71..9eb374f 100644 --- a/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java +++ b/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java @@ -106,19 +106,18 @@ public class AssignmentsHandler { //countDatasourcesAndRecords(assignmentsSize); // Only for DEBUG! Keep it commented in normal run. // TODO - Decide which tasks run with what plugin (depending on their datasource). - // First run -in parallel- the tasks which require some specific plugin. - // Then run the remaining tasks in the generic plugin (which handles parallelism itself). // For now, let's just run all tasks in the generic plugin. - try { PublicationsRetrieverPlugin.processAssignments(assignmentRequestCounter, assignmentsForPlugins.values()); } catch (Exception e) { logger.error("Exception when processing the assignments_" + assignmentRequestCounter, e); } // In this case, we will either have an empty WorkerReport or a half-filled one. Either way, we want to report back to the Controller. + // TODO - If we have more than one plugin running at the same time, then make the "AssignmentsHandler.urlReports"-list thread-safe. + if ( askForTest ) { logger.debug("UrlReports:"); // DEBUG! for ( UrlReport urlReport : urlReports )