- If we receive an "UnknownHostException" when uploading to the S3ObjectStore, then skip the current full-texts' batch to leave some time for the network to get unstuck.

- Code polishing.
This commit is contained in:
Lampros Smyrnaios 2023-11-22 15:29:18 +02:00
parent 9b1f2c4931
commit 7f789b8ad0
4 changed files with 31 additions and 22 deletions

View File

@ -9,26 +9,26 @@ handle_error () {
# Change the working directory to the script's directory, when running from another location. # Change the working directory to the script's directory, when running from another location.
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1 cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
justInstall=0 justRun=0
shouldRunInDocker=0 shouldRunInDocker=0
if [[ $# -eq 1 ]]; then if [[ $# -eq 1 ]]; then
justInstall=$1 justRun=$1
elif [[ $# -eq 2 ]]; then elif [[ $# -eq 2 ]]; then
justInstall=$1 justRun=$1
shouldRunInDocker=$2 shouldRunInDocker=$2
elif [[ $# -gt 2 ]]; then elif [[ $# -gt 2 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justInstall: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2 echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justRun: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
fi fi
if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then if [[ justRun -eq 1 && shouldRunInDocker -eq 1 ]]; then
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justInstall\" to < 0 >" echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justRun\" to < 0 >"
justInstall=0 justRun=0
fi fi
gradleVersion="8.4" gradleVersion="8.4"
if [[ justInstall -eq 0 ]]; then if [[ justRun -eq 0 ]]; then
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip

View File

@ -24,6 +24,7 @@ import org.springframework.stereotype.Component;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.FileReader; import java.io.FileReader;
import java.text.DecimalFormat;
import java.util.*; import java.util.*;
import java.util.concurrent.CancellationException; import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
@ -54,6 +55,8 @@ public class ScheduledTasks {
@Value("${services.pdfaggregation.controller.assignmentLimit}") @Value("${services.pdfaggregation.controller.assignmentLimit}")
private int assignmentsLimit; private int assignmentsLimit;
public static final DecimalFormat df = new DecimalFormat("0.00");
private final String workerReportsDirPath; private final String workerReportsDirPath;
public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0); public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0);
@ -131,8 +134,8 @@ public class ScheduledTasks {
if ( numFailedTasks > 0 ) if ( numFailedTasks > 0 )
logger.warn(numFailedTasks + " out of " + sizeOfFutures + " background tasks have failed!"); logger.warn(numFailedTasks + " out of " + sizeOfFutures + " background tasks have failed!");
else else if ( logger.isTraceEnabled() )
logger.debug("All of the " + sizeOfFutures + " background tasks have succeeded."); logger.trace("All of the " + sizeOfFutures + " background tasks have succeeded.");
} }
@ -228,6 +231,8 @@ public class ScheduledTasks {
inspectWorkerReportsAndTakeAction(ActionForWorkerReports.delete_old); inspectWorkerReportsAndTakeAction(ActionForWorkerReports.delete_old);
} }
private static final double daysToWaitBeforeDeletion = 7.0;
@Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days. @Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days.
//@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs). //@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs).
@ -240,10 +245,10 @@ public class ScheduledTasks {
// The assignments just remain in the table, and the urls cannot be rechecked. // The assignments just remain in the table, and the urls cannot be rechecked.
Calendar calendar = Calendar.getInstance(); Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.DAY_OF_MONTH, -7); // Subtract 7 days from current. calendar.add(Calendar.DAY_OF_MONTH, - (int) daysToWaitBeforeDeletion); // Subtract <daysToWaitBeforeDeletion> from current Date.
DatabaseConnector.databaseLock.lock(); DatabaseConnector.databaseLock.lock();
urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside. urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside.
DatabaseConnector.databaseLock.unlock(); DatabaseConnector.databaseLock.unlock();
} }
@ -295,7 +300,6 @@ public class ScheduledTasks {
enum ActionForWorkerReports {process_previous_failed, process_current_failed, delete_old} enum ActionForWorkerReports {process_previous_failed, process_current_failed, delete_old}
// TODO - Maybe make these numbers configurable from the "application.yml" file. // TODO - Maybe make these numbers configurable from the "application.yml" file.
private static final double daysToWaitBeforeDeletion = 7.0;
private static final double daysToWaitBeforeProcessing = 0.5; // 12 hours private static final double daysToWaitBeforeProcessing = 0.5; // 12 hours
@ -334,7 +338,7 @@ public class ScheduledTasks {
for ( File workerReportSubDir : workerReportSubDirs ) for ( File workerReportSubDir : workerReportSubDirs )
{ {
File[] workerReportFiles = workerReportSubDir.listFiles(File::isFile); File[] workerReportFiles = workerReportSubDir.listFiles(File::isFile);
if (workerReportFiles == null) { if ( workerReportFiles == null ) {
logger.error("There was an error when getting the workerReports of \"workerReportSubDir\": " + workerReportSubDir); logger.error("There was an error when getting the workerReports of \"workerReportSubDir\": " + workerReportSubDir);
return; return;
} else if (workerReportFiles.length == 0) { } else if (workerReportFiles.length == 0) {
@ -370,7 +374,7 @@ public class ScheduledTasks {
} else { // Deletion.. } else { // Deletion..
if ( elapsedDays > daysToWaitBeforeDeletion ) { if ( elapsedDays > daysToWaitBeforeDeletion ) {
// Enough time has passed, the directory should be deleted immediately. // Enough time has passed, the directory should be deleted immediately.
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + elapsedDays + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted."); logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + df.format(elapsedDays) + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
numWorkerReportsToBeHandled ++; numWorkerReportsToBeHandled ++;
if ( fileUtils.deleteFile(workerReportFile.getAbsolutePath()) // Either successful or failed. if ( fileUtils.deleteFile(workerReportFile.getAbsolutePath()) // Either successful or failed.
&& !workerReportName.contains("successful") // If this has failed or its state is unknown (it was never renamed), then delete the assignment-records. For the successful, they have already been deleted. && !workerReportName.contains("successful") // If this has failed or its state is unknown (it was never renamed), then delete the assignment-records. For the successful, they have already been deleted.

View File

@ -22,6 +22,7 @@ import org.springframework.stereotype.Service;
import javax.xml.bind.DatatypeConverter; import javax.xml.bind.DatatypeConverter;
import java.io.File; import java.io.File;
import java.net.ConnectException; import java.net.ConnectException;
import java.net.UnknownHostException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
@ -249,8 +250,8 @@ public class BulkImportServiceImpl implements BulkImportService {
GenericData.Record record = null; GenericData.Record record = null;
try { try {
record = processBulkImportedFile(fileLocation, provenance, bulkImportSource, timeMillis, additionalLoggingMsg); record = processBulkImportedFile(fileLocation, provenance, bulkImportSource, timeMillis, additionalLoggingMsg);
} catch (ConnectException ce) { } catch (Exception e) {
String errorMsg = "ConnectException when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment.."; String errorMsg = "Exception when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment..";
logger.error(errorMsg + additionalLoggingMsg); logger.error(errorMsg + additionalLoggingMsg);
bulkImportReport.addEvent(errorMsg); bulkImportReport.addEvent(errorMsg);
for ( int j=i; j < numOfFilesInSegment; ++j ) for ( int j=i; j < numOfFilesInSegment; ++j )
@ -356,7 +357,7 @@ public class BulkImportServiceImpl implements BulkImportService {
private GenericData.Record processBulkImportedFile(String fileLocation, String provenance, BulkImport.BulkImportSource bulkImportSource, long timeMillis, String additionalLoggingMsg) private GenericData.Record processBulkImportedFile(String fileLocation, String provenance, BulkImport.BulkImportSource bulkImportSource, long timeMillis, String additionalLoggingMsg)
throws ConnectException throws ConnectException, UnknownHostException
{ {
File fullTextFile = new File(fileLocation); File fullTextFile = new File(fileLocation);
DocFileData docFileData = new DocFileData(fullTextFile, null, null, null); DocFileData docFileData = new DocFileData(fullTextFile, null, null, null);

View File

@ -23,6 +23,7 @@ import java.io.*;
import java.net.ConnectException; import java.net.ConnectException;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.URL; import java.net.URL;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@ -568,12 +569,12 @@ public class FileUtils {
try { try {
String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash); String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash);
if (s3Url != null) { if ( s3Url != null ) {
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url); setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
//numUploadedFiles ++; //numUploadedFiles ++;
} }
} catch (ConnectException ce) { } catch (Exception e) {
logger.error("Avoid uploading the rest of the files of this batch.."); logger.error("Avoid uploading the rest of the files of batch..");
break; break;
} }
// Else, the record will have its file-data set to "null", in the end of the caller method (as it will not have an s3Url as its location). // Else, the record will have its file-data set to "null", in the end of the caller method (as it will not have an s3Url as its location).
@ -585,7 +586,7 @@ public class FileUtils {
public String constructS3FilenameAndUploadToS3(String targetDirectory, String fileName, String fileNameID, public String constructS3FilenameAndUploadToS3(String targetDirectory, String fileName, String fileNameID,
String dotFileExtension, String datasourceId, String hash) throws ConnectException String dotFileExtension, String datasourceId, String hash) throws ConnectException, UnknownHostException
{ {
String filenameForS3 = constructS3FileName(fileName, fileNameID, dotFileExtension, datasourceId, hash); // This name is for the uploaded file, in the S3 Object Store. String filenameForS3 = constructS3FileName(fileName, fileNameID, dotFileExtension, datasourceId, hash); // This name is for the uploaded file, in the S3 Object Store.
if ( filenameForS3 == null ) // The error is logged inside. if ( filenameForS3 == null ) // The error is logged inside.
@ -598,6 +599,9 @@ public class FileUtils {
} catch (ConnectException ce) { } catch (ConnectException ce) {
logger.error("Could not connect with the S3 Object Store! " + ce.getMessage()); logger.error("Could not connect with the S3 Object Store! " + ce.getMessage());
throw ce; throw ce;
} catch (UnknownHostException uhe) {
logger.error("The S3 Object Store could not be found! " + uhe.getMessage());
throw uhe;
} catch (Exception e) { } catch (Exception e) {
logger.error("Could not upload the local-file \"" + fileFullPath + "\" to the S3 ObjectStore, with S3-filename: \"" + filenameForS3 + "\"!", e); logger.error("Could not upload the local-file \"" + fileFullPath + "\" to the S3 ObjectStore, with S3-filename: \"" + filenameForS3 + "\"!", e);
return null; return null;