diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index 32e81c3..3b8d8ec 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -130,6 +130,9 @@ public class PublicationsRetrieverPlugin { logger.warn(numFailedTasks + " tasks failed, from assignments_" + assignmentRequestCounter); addUrlReportsToWorkerReport(); callableTasks.clear(); // Reset the thread-tasks-list for the next batch. + + UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch. + // In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway. } @@ -151,11 +154,11 @@ public class PublicationsRetrieverPlugin { status = UrlReport.StatusType.accessible; if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) { // The file of this docUrl was already downloaded by another docUrl. - String previousId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length() +1); - //logger.debug("previousId: " + previousId); // DEBUG! + String initialId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length()); // The fileName starts right after the "message". + //logger.debug("initialId: " + initialId); // DEBUG! // Search that ID inside the list and if that instance gave the docUrl (there might be multiple ID instances) then get the file-location. for ( DataToBeLogged data_2 : FileUtils.dataToBeLoggedList ) { - if ( data_2.getUrlId().equals(previousId) && data_2.getWasDocumentOrDatasetAccessible().equals("true") ) { + if ( data_2.getUrlId().equals(initialId) && data_2.getWasDocumentOrDatasetAccessible().equals("true") ) { fileLocation = data_2.getComment(); size = data_2.getSize(); hash = data_2.getHash();