From dc97b323c9c4f5b145afb55c084e5352131e176c Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Fri, 4 Aug 2023 15:33:48 +0300 Subject: [PATCH] - Show a warning, if the "numOfUnretrievedFiles" is over 50. - Delete gradle .zip file after installation. - Code polishing. --- installAndRun.sh | 1 + .../plugins/PublicationsRetrieverPlugin.java | 10 ++++++++-- .../urls_worker/controllers/FullTextsController.java | 7 ++++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/installAndRun.sh b/installAndRun.sh index c3a0aea..e648c7b 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -66,6 +66,7 @@ if [[ justRun -eq 0 ]]; then echo -e "\nAsking for sudo, in order to install 'gradle'..\n" sudo mkdir /opt/gradle sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip + sudo rm -rf gradle-${gradleVersion}-bin.zip #ls /opt/gradle/gradle-${gradleVersion} # For debugging installation fi diff --git a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java index ea62442..901fa3f 100644 --- a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java @@ -185,6 +185,7 @@ public class PublicationsRetrieverPlugin { for ( Assignment assignment : assignments ) urlIdsWithDatasourceIds.put(assignment.getId(), assignment.getDatasource().getId()); + int numOfUnretrievedFiles = 0; Timestamp timestamp = new Timestamp(System.currentTimeMillis()); // Store it here, in order to have the same for all current records. for ( DataToBeLogged data : FileUtils.dataToBeLoggedList ) @@ -247,11 +248,13 @@ public class PublicationsRetrieverPlugin { else if ( ! comment.startsWith(HttpConnUtils.docFileNotRetrievedMessage, 0) ) { // If it was downloaded without an error. fileLocation = comment; // This is the full-file-path. mimeType = "application/pdf"; - } else // Else the file was not retrieved, so all file-related data are kept "null". + } else { // Else the file was not retrieved, so all file-related data are kept "null". + numOfUnretrievedFiles ++; error = new Error(Error.ErrorType.couldRetry, comment); // We can still try to download it from the found docUrl, in the future. + } if ( error == null ) // If the file was retrieved, in any time. - error = new Error(Error.ErrorType.couldRetry, null); // We do not want to send a "null" Error-object, since it just adds more complicated handling in the controller.. + error = new Error(Error.ErrorType.couldRetry, null); // We do not want to send a "null" Error-object, since it just adds more complicated handling in the Controller.. } else { status = UrlReport.StatusType.non_accessible; @@ -281,6 +284,9 @@ public class PublicationsRetrieverPlugin { AssignmentsHandler.urlReports.add(new UrlReport(status, payload, error)); }// end-for FileUtils.dataToBeLoggedList.clear(); // Empty the list, to be re-populated by the next batch / assignment. + + if ( numOfUnretrievedFiles > 50 ) + logger.warn("The number of non-retrieved files is: " + numOfUnretrievedFiles); } } diff --git a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java index 2b21bab..737c4df 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java @@ -38,7 +38,7 @@ public class FullTextsController { public Object getFullTexts(@PathVariable long assignmentsCounter, @PathVariable int totalBatches, @PathVariable int batchCounter, @PathVariable List fileNamesWithExtensions) { int fileNamesListNum = fileNamesWithExtensions.size(); - if ( (fileNamesListNum == 1) && (fileNamesWithExtensions.get(0).length() == 0) ) { // In case the last "/" in the url was given (without any files following), then this list will not be empty, but have one empty item instead. + if ( (fileNamesListNum == 1) && (fileNamesWithExtensions.get(0).isEmpty()) ) { // In case the last "/" in the url was given (without any files following), then this list will not be empty, but have one empty item instead. // In case the url does not end in "/", then Spring will automatically return an "HTTP-BadRequest". String errorMsg = "An empty \"fileNamesWithExtensions\" list was given from assignments_" + assignmentsCounter + ", for batch_" + batchCounter; logger.error(errorMsg); @@ -91,8 +91,9 @@ public class FullTextsController { } finally { // The ".tar.zstd" file of this batch, for which we pass a steam to the Controller, will be deleted by the next batch or in the end of these assignments. // Now we will delete the zstd file of the previous assignments. - if ( batchCounter >= 2 ) - deleteFile(currentAssignmentsBaseFullTextsPath + "assignments_" + assignmentsCounter + "_full-texts_" + (batchCounter -1) + ".tar.zstd"); + int previousBatchCounter = (batchCounter -1); + if ( previousBatchCounter >= 1 ) + deleteFile(currentAssignmentsBaseFullTextsPath + "assignments_" + assignmentsCounter + "_full-texts_" + previousBatchCounter + ".tar.zstd"); } }