forked from lsmyrnaios/UrlsWorker
- Reduce memory consumption and fix a potential issue, where many "already-retrieved" full-texts would be already deleted (or in different directories, for a short time), as they belonged to a previous assignments-batch (this case is now possible, after the following fix).
- Fix a bug, causing a missing character in the "alreadyDownloaded" full-text fileName, which in turn caused the file-data of that record to not get updated with the file-data of the record for which the same file was initially downloaded for.
This commit is contained in:
parent
859f850f56
commit
0db35a83e7
|
@ -130,6 +130,9 @@ public class PublicationsRetrieverPlugin {
|
|||
logger.warn(numFailedTasks + " tasks failed, from assignments_" + assignmentRequestCounter);
|
||||
addUrlReportsToWorkerReport();
|
||||
callableTasks.clear(); // Reset the thread-tasks-list for the next batch.
|
||||
|
||||
UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch.
|
||||
// In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway.
|
||||
}
|
||||
|
||||
|
||||
|
@ -151,11 +154,11 @@ public class PublicationsRetrieverPlugin {
|
|||
status = UrlReport.StatusType.accessible;
|
||||
if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) {
|
||||
// The file of this docUrl was already downloaded by another docUrl.
|
||||
String previousId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length() +1);
|
||||
//logger.debug("previousId: " + previousId); // DEBUG!
|
||||
String initialId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length()); // The fileName starts right after the "message".
|
||||
//logger.debug("initialId: " + initialId); // DEBUG!
|
||||
// Search that ID inside the list and if that instance gave the docUrl (there might be multiple ID instances) then get the file-location.
|
||||
for ( DataToBeLogged data_2 : FileUtils.dataToBeLoggedList ) {
|
||||
if ( data_2.getUrlId().equals(previousId) && data_2.getWasDocumentOrDatasetAccessible().equals("true") ) {
|
||||
if ( data_2.getUrlId().equals(initialId) && data_2.getWasDocumentOrDatasetAccessible().equals("true") ) {
|
||||
fileLocation = data_2.getComment();
|
||||
size = data_2.getSize();
|
||||
hash = data_2.getHash();
|
||||
|
|
Loading…
Reference in New Issue