From 7f789b8ad026a41d9a1d981ad0da52c680a5f05e Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Wed, 22 Nov 2023 15:29:18 +0200 Subject: [PATCH] - If we receive an "UnknownHostException" when uploading to the S3ObjectStore, then skip the current full-texts' batch to leave some time for the network to get unstuck. - Code polishing. --- installAndRun.sh | 16 ++++++++-------- .../components/ScheduledTasks.java | 18 +++++++++++------- .../services/BulkImportServiceImpl.java | 7 ++++--- .../urls_controller/util/FileUtils.java | 12 ++++++++---- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/installAndRun.sh b/installAndRun.sh index 20f5456..1ceec35 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -9,26 +9,26 @@ handle_error () { # Change the working directory to the script's directory, when running from another location. cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1 -justInstall=0 +justRun=0 shouldRunInDocker=0 if [[ $# -eq 1 ]]; then - justInstall=$1 + justRun=$1 elif [[ $# -eq 2 ]]; then - justInstall=$1 + justRun=$1 shouldRunInDocker=$2 elif [[ $# -gt 2 ]]; then - echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh "; exit 2 + echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh "; exit 2 fi -if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then - echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justInstall\" to < 0 >" - justInstall=0 +if [[ justRun -eq 1 && shouldRunInDocker -eq 1 ]]; then + echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justRun\" to < 0 >" + justRun=0 fi gradleVersion="8.4" -if [[ justInstall -eq 0 ]]; then +if [[ justRun -eq 0 ]]; then if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip diff --git a/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java b/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java index 171d4a2..8dc5760 100644 --- a/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java +++ b/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java @@ -24,6 +24,7 @@ import org.springframework.stereotype.Component; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; +import java.text.DecimalFormat; import java.util.*; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; @@ -54,6 +55,8 @@ public class ScheduledTasks { @Value("${services.pdfaggregation.controller.assignmentLimit}") private int assignmentsLimit; + public static final DecimalFormat df = new DecimalFormat("0.00"); + private final String workerReportsDirPath; public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0); @@ -131,8 +134,8 @@ public class ScheduledTasks { if ( numFailedTasks > 0 ) logger.warn(numFailedTasks + " out of " + sizeOfFutures + " background tasks have failed!"); - else - logger.debug("All of the " + sizeOfFutures + " background tasks have succeeded."); + else if ( logger.isTraceEnabled() ) + logger.trace("All of the " + sizeOfFutures + " background tasks have succeeded."); } @@ -228,6 +231,8 @@ public class ScheduledTasks { inspectWorkerReportsAndTakeAction(ActionForWorkerReports.delete_old); } + private static final double daysToWaitBeforeDeletion = 7.0; + @Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days. //@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs). @@ -240,10 +245,10 @@ public class ScheduledTasks { // The assignments just remain in the table, and the urls cannot be rechecked. Calendar calendar = Calendar.getInstance(); - calendar.add(Calendar.DAY_OF_MONTH, -7); // Subtract 7 days from current. + calendar.add(Calendar.DAY_OF_MONTH, - (int) daysToWaitBeforeDeletion); // Subtract from current Date. DatabaseConnector.databaseLock.lock(); - urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside. + urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside. DatabaseConnector.databaseLock.unlock(); } @@ -295,7 +300,6 @@ public class ScheduledTasks { enum ActionForWorkerReports {process_previous_failed, process_current_failed, delete_old} // TODO - Maybe make these numbers configurable from the "application.yml" file. - private static final double daysToWaitBeforeDeletion = 7.0; private static final double daysToWaitBeforeProcessing = 0.5; // 12 hours @@ -334,7 +338,7 @@ public class ScheduledTasks { for ( File workerReportSubDir : workerReportSubDirs ) { File[] workerReportFiles = workerReportSubDir.listFiles(File::isFile); - if (workerReportFiles == null) { + if ( workerReportFiles == null ) { logger.error("There was an error when getting the workerReports of \"workerReportSubDir\": " + workerReportSubDir); return; } else if (workerReportFiles.length == 0) { @@ -370,7 +374,7 @@ public class ScheduledTasks { } else { // Deletion.. if ( elapsedDays > daysToWaitBeforeDeletion ) { // Enough time has passed, the directory should be deleted immediately. - logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + elapsedDays + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted."); + logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + df.format(elapsedDays) + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted."); numWorkerReportsToBeHandled ++; if ( fileUtils.deleteFile(workerReportFile.getAbsolutePath()) // Either successful or failed. && !workerReportName.contains("successful") // If this has failed or its state is unknown (it was never renamed), then delete the assignment-records. For the successful, they have already been deleted. diff --git a/src/main/java/eu/openaire/urls_controller/services/BulkImportServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/BulkImportServiceImpl.java index 5ea6873..1bcbe64 100644 --- a/src/main/java/eu/openaire/urls_controller/services/BulkImportServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/BulkImportServiceImpl.java @@ -22,6 +22,7 @@ import org.springframework.stereotype.Service; import javax.xml.bind.DatatypeConverter; import java.io.File; import java.net.ConnectException; +import java.net.UnknownHostException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -249,8 +250,8 @@ public class BulkImportServiceImpl implements BulkImportService { GenericData.Record record = null; try { record = processBulkImportedFile(fileLocation, provenance, bulkImportSource, timeMillis, additionalLoggingMsg); - } catch (ConnectException ce) { - String errorMsg = "ConnectException when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment.."; + } catch (Exception e) { + String errorMsg = "Exception when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment.."; logger.error(errorMsg + additionalLoggingMsg); bulkImportReport.addEvent(errorMsg); for ( int j=i; j < numOfFilesInSegment; ++j ) @@ -356,7 +357,7 @@ public class BulkImportServiceImpl implements BulkImportService { private GenericData.Record processBulkImportedFile(String fileLocation, String provenance, BulkImport.BulkImportSource bulkImportSource, long timeMillis, String additionalLoggingMsg) - throws ConnectException + throws ConnectException, UnknownHostException { File fullTextFile = new File(fileLocation); DocFileData docFileData = new DocFileData(fullTextFile, null, null, null); diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index a8d2534..b1ad81d 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -23,6 +23,7 @@ import java.io.*; import java.net.ConnectException; import java.net.HttpURLConnection; import java.net.URL; +import java.net.UnknownHostException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; @@ -568,12 +569,12 @@ public class FileUtils { try { String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash); - if (s3Url != null) { + if ( s3Url != null ) { setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url); //numUploadedFiles ++; } - } catch (ConnectException ce) { - logger.error("Avoid uploading the rest of the files of this batch.."); + } catch (Exception e) { + logger.error("Avoid uploading the rest of the files of batch.."); break; } // Else, the record will have its file-data set to "null", in the end of the caller method (as it will not have an s3Url as its location). @@ -585,7 +586,7 @@ public class FileUtils { public String constructS3FilenameAndUploadToS3(String targetDirectory, String fileName, String fileNameID, - String dotFileExtension, String datasourceId, String hash) throws ConnectException + String dotFileExtension, String datasourceId, String hash) throws ConnectException, UnknownHostException { String filenameForS3 = constructS3FileName(fileName, fileNameID, dotFileExtension, datasourceId, hash); // This name is for the uploaded file, in the S3 Object Store. if ( filenameForS3 == null ) // The error is logged inside. @@ -598,6 +599,9 @@ public class FileUtils { } catch (ConnectException ce) { logger.error("Could not connect with the S3 Object Store! " + ce.getMessage()); throw ce; + } catch (UnknownHostException uhe) { + logger.error("The S3 Object Store could not be found! " + uhe.getMessage()); + throw uhe; } catch (Exception e) { logger.error("Could not upload the local-file \"" + fileFullPath + "\" to the S3 ObjectStore, with S3-filename: \"" + filenameForS3 + "\"!", e); return null;