- If we receive an "UnknownHostException" when uploading to the S3ObjectStore, then skip the current full-texts' batch to leave some time for the network to get unstuck.

- Code polishing.
This commit is contained in:
Lampros Smyrnaios 2023-11-22 15:29:18 +02:00
parent 9b1f2c4931
commit 7f789b8ad0
4 changed files with 31 additions and 22 deletions

View File

@ -9,26 +9,26 @@ handle_error () {
# Change the working directory to the script's directory, when running from another location.
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
justInstall=0
justRun=0
shouldRunInDocker=0
if [[ $# -eq 1 ]]; then
justInstall=$1
justRun=$1
elif [[ $# -eq 2 ]]; then
justInstall=$1
justRun=$1
shouldRunInDocker=$2
elif [[ $# -gt 2 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justInstall: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justRun: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
fi
if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justInstall\" to < 0 >"
justInstall=0
if [[ justRun -eq 1 && shouldRunInDocker -eq 1 ]]; then
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justRun\" to < 0 >"
justRun=0
fi
gradleVersion="8.4"
if [[ justInstall -eq 0 ]]; then
if [[ justRun -eq 0 ]]; then
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip

View File

@ -24,6 +24,7 @@ import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.text.DecimalFormat;
import java.util.*;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
@ -54,6 +55,8 @@ public class ScheduledTasks {
@Value("${services.pdfaggregation.controller.assignmentLimit}")
private int assignmentsLimit;
public static final DecimalFormat df = new DecimalFormat("0.00");
private final String workerReportsDirPath;
public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0);
@ -131,8 +134,8 @@ public class ScheduledTasks {
if ( numFailedTasks > 0 )
logger.warn(numFailedTasks + " out of " + sizeOfFutures + " background tasks have failed!");
else
logger.debug("All of the " + sizeOfFutures + " background tasks have succeeded.");
else if ( logger.isTraceEnabled() )
logger.trace("All of the " + sizeOfFutures + " background tasks have succeeded.");
}
@ -228,6 +231,8 @@ public class ScheduledTasks {
inspectWorkerReportsAndTakeAction(ActionForWorkerReports.delete_old);
}
private static final double daysToWaitBeforeDeletion = 7.0;
@Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days.
//@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs).
@ -240,10 +245,10 @@ public class ScheduledTasks {
// The assignments just remain in the table, and the urls cannot be rechecked.
Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.DAY_OF_MONTH, -7); // Subtract 7 days from current.
calendar.add(Calendar.DAY_OF_MONTH, - (int) daysToWaitBeforeDeletion); // Subtract <daysToWaitBeforeDeletion> from current Date.
DatabaseConnector.databaseLock.lock();
urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside.
urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside.
DatabaseConnector.databaseLock.unlock();
}
@ -295,7 +300,6 @@ public class ScheduledTasks {
enum ActionForWorkerReports {process_previous_failed, process_current_failed, delete_old}
// TODO - Maybe make these numbers configurable from the "application.yml" file.
private static final double daysToWaitBeforeDeletion = 7.0;
private static final double daysToWaitBeforeProcessing = 0.5; // 12 hours
@ -334,7 +338,7 @@ public class ScheduledTasks {
for ( File workerReportSubDir : workerReportSubDirs )
{
File[] workerReportFiles = workerReportSubDir.listFiles(File::isFile);
if (workerReportFiles == null) {
if ( workerReportFiles == null ) {
logger.error("There was an error when getting the workerReports of \"workerReportSubDir\": " + workerReportSubDir);
return;
} else if (workerReportFiles.length == 0) {
@ -370,7 +374,7 @@ public class ScheduledTasks {
} else { // Deletion..
if ( elapsedDays > daysToWaitBeforeDeletion ) {
// Enough time has passed, the directory should be deleted immediately.
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + elapsedDays + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + df.format(elapsedDays) + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
numWorkerReportsToBeHandled ++;
if ( fileUtils.deleteFile(workerReportFile.getAbsolutePath()) // Either successful or failed.
&& !workerReportName.contains("successful") // If this has failed or its state is unknown (it was never renamed), then delete the assignment-records. For the successful, they have already been deleted.

View File

@ -22,6 +22,7 @@ import org.springframework.stereotype.Service;
import javax.xml.bind.DatatypeConverter;
import java.io.File;
import java.net.ConnectException;
import java.net.UnknownHostException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
@ -249,8 +250,8 @@ public class BulkImportServiceImpl implements BulkImportService {
GenericData.Record record = null;
try {
record = processBulkImportedFile(fileLocation, provenance, bulkImportSource, timeMillis, additionalLoggingMsg);
} catch (ConnectException ce) {
String errorMsg = "ConnectException when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment..";
} catch (Exception e) {
String errorMsg = "Exception when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment..";
logger.error(errorMsg + additionalLoggingMsg);
bulkImportReport.addEvent(errorMsg);
for ( int j=i; j < numOfFilesInSegment; ++j )
@ -356,7 +357,7 @@ public class BulkImportServiceImpl implements BulkImportService {
private GenericData.Record processBulkImportedFile(String fileLocation, String provenance, BulkImport.BulkImportSource bulkImportSource, long timeMillis, String additionalLoggingMsg)
throws ConnectException
throws ConnectException, UnknownHostException
{
File fullTextFile = new File(fileLocation);
DocFileData docFileData = new DocFileData(fullTextFile, null, null, null);

View File

@ -23,6 +23,7 @@ import java.io.*;
import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
@ -568,12 +569,12 @@ public class FileUtils {
try {
String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash);
if (s3Url != null) {
if ( s3Url != null ) {
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
//numUploadedFiles ++;
}
} catch (ConnectException ce) {
logger.error("Avoid uploading the rest of the files of this batch..");
} catch (Exception e) {
logger.error("Avoid uploading the rest of the files of batch..");
break;
}
// Else, the record will have its file-data set to "null", in the end of the caller method (as it will not have an s3Url as its location).
@ -585,7 +586,7 @@ public class FileUtils {
public String constructS3FilenameAndUploadToS3(String targetDirectory, String fileName, String fileNameID,
String dotFileExtension, String datasourceId, String hash) throws ConnectException
String dotFileExtension, String datasourceId, String hash) throws ConnectException, UnknownHostException
{
String filenameForS3 = constructS3FileName(fileName, fileNameID, dotFileExtension, datasourceId, hash); // This name is for the uploaded file, in the S3 Object Store.
if ( filenameForS3 == null ) // The error is logged inside.
@ -598,6 +599,9 @@ public class FileUtils {
} catch (ConnectException ce) {
logger.error("Could not connect with the S3 Object Store! " + ce.getMessage());
throw ce;
} catch (UnknownHostException uhe) {
logger.error("The S3 Object Store could not be found! " + uhe.getMessage());
throw uhe;
} catch (Exception e) {
logger.error("Could not upload the local-file \"" + fileFullPath + "\" to the S3 ObjectStore, with S3-filename: \"" + filenameForS3 + "\"!", e);
return null;