forked from lsmyrnaios/UrlsController
- If we receive an "UnknownHostException" when uploading to the S3ObjectStore, then skip the current full-texts' batch to leave some time for the network to get unstuck.
- Code polishing.
This commit is contained in:
parent
9b1f2c4931
commit
7f789b8ad0
|
@ -9,26 +9,26 @@ handle_error () {
|
|||
# Change the working directory to the script's directory, when running from another location.
|
||||
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
|
||||
|
||||
justInstall=0
|
||||
justRun=0
|
||||
shouldRunInDocker=0
|
||||
|
||||
if [[ $# -eq 1 ]]; then
|
||||
justInstall=$1
|
||||
justRun=$1
|
||||
elif [[ $# -eq 2 ]]; then
|
||||
justInstall=$1
|
||||
justRun=$1
|
||||
shouldRunInDocker=$2
|
||||
elif [[ $# -gt 2 ]]; then
|
||||
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justInstall: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
|
||||
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justRun: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
|
||||
fi
|
||||
|
||||
if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then
|
||||
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justInstall\" to < 0 >"
|
||||
justInstall=0
|
||||
if [[ justRun -eq 1 && shouldRunInDocker -eq 1 ]]; then
|
||||
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justRun\" to < 0 >"
|
||||
justRun=0
|
||||
fi
|
||||
|
||||
gradleVersion="8.4"
|
||||
|
||||
if [[ justInstall -eq 0 ]]; then
|
||||
if [[ justRun -eq 0 ]]; then
|
||||
|
||||
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then
|
||||
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.springframework.stereotype.Component;
|
|||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.CancellationException;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
@ -54,6 +55,8 @@ public class ScheduledTasks {
|
|||
@Value("${services.pdfaggregation.controller.assignmentLimit}")
|
||||
private int assignmentsLimit;
|
||||
|
||||
public static final DecimalFormat df = new DecimalFormat("0.00");
|
||||
|
||||
private final String workerReportsDirPath;
|
||||
|
||||
public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0);
|
||||
|
@ -131,8 +134,8 @@ public class ScheduledTasks {
|
|||
|
||||
if ( numFailedTasks > 0 )
|
||||
logger.warn(numFailedTasks + " out of " + sizeOfFutures + " background tasks have failed!");
|
||||
else
|
||||
logger.debug("All of the " + sizeOfFutures + " background tasks have succeeded.");
|
||||
else if ( logger.isTraceEnabled() )
|
||||
logger.trace("All of the " + sizeOfFutures + " background tasks have succeeded.");
|
||||
}
|
||||
|
||||
|
||||
|
@ -228,6 +231,8 @@ public class ScheduledTasks {
|
|||
inspectWorkerReportsAndTakeAction(ActionForWorkerReports.delete_old);
|
||||
}
|
||||
|
||||
private static final double daysToWaitBeforeDeletion = 7.0;
|
||||
|
||||
|
||||
@Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days.
|
||||
//@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs).
|
||||
|
@ -240,10 +245,10 @@ public class ScheduledTasks {
|
|||
// The assignments just remain in the table, and the urls cannot be rechecked.
|
||||
|
||||
Calendar calendar = Calendar.getInstance();
|
||||
calendar.add(Calendar.DAY_OF_MONTH, -7); // Subtract 7 days from current.
|
||||
calendar.add(Calendar.DAY_OF_MONTH, - (int) daysToWaitBeforeDeletion); // Subtract <daysToWaitBeforeDeletion> from current Date.
|
||||
|
||||
DatabaseConnector.databaseLock.lock();
|
||||
urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside.
|
||||
urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside.
|
||||
DatabaseConnector.databaseLock.unlock();
|
||||
}
|
||||
|
||||
|
@ -295,7 +300,6 @@ public class ScheduledTasks {
|
|||
enum ActionForWorkerReports {process_previous_failed, process_current_failed, delete_old}
|
||||
|
||||
// TODO - Maybe make these numbers configurable from the "application.yml" file.
|
||||
private static final double daysToWaitBeforeDeletion = 7.0;
|
||||
|
||||
private static final double daysToWaitBeforeProcessing = 0.5; // 12 hours
|
||||
|
||||
|
@ -334,7 +338,7 @@ public class ScheduledTasks {
|
|||
for ( File workerReportSubDir : workerReportSubDirs )
|
||||
{
|
||||
File[] workerReportFiles = workerReportSubDir.listFiles(File::isFile);
|
||||
if (workerReportFiles == null) {
|
||||
if ( workerReportFiles == null ) {
|
||||
logger.error("There was an error when getting the workerReports of \"workerReportSubDir\": " + workerReportSubDir);
|
||||
return;
|
||||
} else if (workerReportFiles.length == 0) {
|
||||
|
@ -370,7 +374,7 @@ public class ScheduledTasks {
|
|||
} else { // Deletion..
|
||||
if ( elapsedDays > daysToWaitBeforeDeletion ) {
|
||||
// Enough time has passed, the directory should be deleted immediately.
|
||||
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + elapsedDays + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
|
||||
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + df.format(elapsedDays) + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
|
||||
numWorkerReportsToBeHandled ++;
|
||||
if ( fileUtils.deleteFile(workerReportFile.getAbsolutePath()) // Either successful or failed.
|
||||
&& !workerReportName.contains("successful") // If this has failed or its state is unknown (it was never renamed), then delete the assignment-records. For the successful, they have already been deleted.
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.springframework.stereotype.Service;
|
|||
import javax.xml.bind.DatatypeConverter;
|
||||
import java.io.File;
|
||||
import java.net.ConnectException;
|
||||
import java.net.UnknownHostException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
@ -249,8 +250,8 @@ public class BulkImportServiceImpl implements BulkImportService {
|
|||
GenericData.Record record = null;
|
||||
try {
|
||||
record = processBulkImportedFile(fileLocation, provenance, bulkImportSource, timeMillis, additionalLoggingMsg);
|
||||
} catch (ConnectException ce) {
|
||||
String errorMsg = "ConnectException when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment..";
|
||||
} catch (Exception e) {
|
||||
String errorMsg = "Exception when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment..";
|
||||
logger.error(errorMsg + additionalLoggingMsg);
|
||||
bulkImportReport.addEvent(errorMsg);
|
||||
for ( int j=i; j < numOfFilesInSegment; ++j )
|
||||
|
@ -356,7 +357,7 @@ public class BulkImportServiceImpl implements BulkImportService {
|
|||
|
||||
|
||||
private GenericData.Record processBulkImportedFile(String fileLocation, String provenance, BulkImport.BulkImportSource bulkImportSource, long timeMillis, String additionalLoggingMsg)
|
||||
throws ConnectException
|
||||
throws ConnectException, UnknownHostException
|
||||
{
|
||||
File fullTextFile = new File(fileLocation);
|
||||
DocFileData docFileData = new DocFileData(fullTextFile, null, null, null);
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.io.*;
|
|||
import java.net.ConnectException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.net.UnknownHostException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
@ -568,12 +569,12 @@ public class FileUtils {
|
|||
|
||||
try {
|
||||
String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash);
|
||||
if (s3Url != null) {
|
||||
if ( s3Url != null ) {
|
||||
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
|
||||
//numUploadedFiles ++;
|
||||
}
|
||||
} catch (ConnectException ce) {
|
||||
logger.error("Avoid uploading the rest of the files of this batch..");
|
||||
} catch (Exception e) {
|
||||
logger.error("Avoid uploading the rest of the files of batch..");
|
||||
break;
|
||||
}
|
||||
// Else, the record will have its file-data set to "null", in the end of the caller method (as it will not have an s3Url as its location).
|
||||
|
@ -585,7 +586,7 @@ public class FileUtils {
|
|||
|
||||
|
||||
public String constructS3FilenameAndUploadToS3(String targetDirectory, String fileName, String fileNameID,
|
||||
String dotFileExtension, String datasourceId, String hash) throws ConnectException
|
||||
String dotFileExtension, String datasourceId, String hash) throws ConnectException, UnknownHostException
|
||||
{
|
||||
String filenameForS3 = constructS3FileName(fileName, fileNameID, dotFileExtension, datasourceId, hash); // This name is for the uploaded file, in the S3 Object Store.
|
||||
if ( filenameForS3 == null ) // The error is logged inside.
|
||||
|
@ -598,6 +599,9 @@ public class FileUtils {
|
|||
} catch (ConnectException ce) {
|
||||
logger.error("Could not connect with the S3 Object Store! " + ce.getMessage());
|
||||
throw ce;
|
||||
} catch (UnknownHostException uhe) {
|
||||
logger.error("The S3 Object Store could not be found! " + uhe.getMessage());
|
||||
throw uhe;
|
||||
} catch (Exception e) {
|
||||
logger.error("Could not upload the local-file \"" + fileFullPath + "\" to the S3 ObjectStore, with S3-filename: \"" + filenameForS3 + "\"!", e);
|
||||
return null;
|
||||
|
|
Loading…
Reference in New Issue