Browse Source

- Reduce memory consumption and fix a potential issue, where many "already-retrieved" full-texts would be already deleted (or in different directories, for a short time), as they belonged to a previous assignments-batch (this case is now possible, after the following fix).

- Fix a bug, causing a missing character in the "alreadyDownloaded" full-text fileName, which in turn caused the file-data of that record to not get updated with the file-data of the record for which the same file was initially downloaded for.
master
Lampros Smyrnaios 5 months ago
parent
commit
0db35a83e7
  1. 9
      src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java

9
src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java

@ -130,6 +130,9 @@ public class PublicationsRetrieverPlugin {
logger.warn(numFailedTasks + " tasks failed, from assignments_" + assignmentRequestCounter);
addUrlReportsToWorkerReport();
callableTasks.clear(); // Reset the thread-tasks-list for the next batch.
UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch.
// In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway.
}
@ -151,11 +154,11 @@ public class PublicationsRetrieverPlugin {
status = UrlReport.StatusType.accessible;
if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) {
// The file of this docUrl was already downloaded by another docUrl.
String previousId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length() +1);
//logger.debug("previousId: " + previousId); // DEBUG!
String initialId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length()); // The fileName starts right after the "message".
//logger.debug("initialId: " + initialId); // DEBUG!
// Search that ID inside the list and if that instance gave the docUrl (there might be multiple ID instances) then get the file-location.
for ( DataToBeLogged data_2 : FileUtils.dataToBeLoggedList ) {
if ( data_2.getUrlId().equals(previousId) && data_2.getWasDocumentOrDatasetAccessible().equals("true") ) {
if ( data_2.getUrlId().equals(initialId) && data_2.getWasDocumentOrDatasetAccessible().equals("true") ) {
fileLocation = data_2.getComment();
size = data_2.getSize();
hash = data_2.getHash();

Loading…
Cancel
Save