From 484cf5cefca132b4dcf30e7fef9d2fe3658fb3fe Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Wed, 29 Mar 2023 17:12:37 +0300 Subject: [PATCH] - Avoid requesting the remaining full-text batches in case the Worker returns a 5XX error in one of the batches. - Add nullability-checks for "datasourceId" and "hash" before constructing the new filename and upload the full-text on S3. - Improve a log-message. --- .../urls_controller/util/FileUtils.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index f7196bb..7e2eba1 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -290,7 +290,7 @@ public class FileUtils { continue; // To the next batch. } else if ( (extractedFileNames.length -2) != fileNamesForCurBatch.size() ) { - logger.warn("The number of extracted files (" + (extractedFileNames.length -2) + ") was not equal to the number of the current-batch's files (" + fileNamesForCurBatch.size() + ")."); + logger.warn("The number of extracted files (" + (extractedFileNames.length -2) + ") was not equal to the number of the current-batch's (" + batchCounter + ") files (" + fileNamesForCurBatch.size() + ")."); // We do NOT have to find and cross-reference the missing files with the urlReports, in order to set their locations to , // since, in the end of each assignments-batch, an iteration will be made and for all the non-retrieved and non-uploaded full-texts, the app will set them to null. } @@ -330,9 +330,13 @@ public class FileUtils { conn.connect(); int statusCode = conn.getResponseCode(); if ( statusCode != 200 ) { - logger.warn("HTTP-" + statusCode + ": " + getMessageFromResponseBody(conn, true) + "\nProblem when requesting the ZstdFile of batch_" + batchNum + " from the Worker with ID \"" + workerId + "\" and requestUrl: " + requestUrl); + logger.warn("HTTP-" + statusCode + ": " + getMessageFromResponseBody(conn, true) + "\n\nProblem when requesting the ZstdFile of batch_" + batchNum + " from the Worker with ID \"" + workerId + "\" and requestUrl: " + requestUrl); + if ( (statusCode >= 500) && (statusCode <= 599) ) + throw new RuntimeException(); // Throw an exception to indicate that the Worker has problems and all remaining batches will fail as well. return null; } + } catch (RuntimeException re) { + throw re; } catch (Exception e) { String exMessage = e.getMessage(); logger.warn("Problem when requesting the ZstdFile of batch_" + batchNum + " of assignments_" + assignmentsBatchCounter + " from the Worker with ID \"" + workerId + "\" and requestUrl: " + requestUrl + "\n" + exMessage); @@ -413,6 +417,16 @@ public class FileUtils { continue; } + if ( datasourceId == null ) { + logger.error("The retrieved \"datasourceId\" was \"null\" for file: " + fileName); + continue; + } + + if ( hash == null ) { + logger.error("The retrieved \"hash\" was \"null\" for file: " + fileName); + continue; + } + String fileFullPath = targetDirectory + fileName; // The fullPath to the local file. // Use the "fileNameID" and not the "filenameWithoutExtension", as we want to avoid keeping the possible "parenthesis" with the increasing number (about the duplication of ID-fileName).