- If we receive an "UnknownHostException" when uploading to the S3ObjectStore, then skip the current full-texts' batch to leave some time for the network to get unstuck.
- Code polishing.
This commit is contained in:
parent
9b1f2c4931
commit
7f789b8ad0
|
@ -9,26 +9,26 @@ handle_error () {
|
||||||
# Change the working directory to the script's directory, when running from another location.
|
# Change the working directory to the script's directory, when running from another location.
|
||||||
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
|
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
|
||||||
|
|
||||||
justInstall=0
|
justRun=0
|
||||||
shouldRunInDocker=0
|
shouldRunInDocker=0
|
||||||
|
|
||||||
if [[ $# -eq 1 ]]; then
|
if [[ $# -eq 1 ]]; then
|
||||||
justInstall=$1
|
justRun=$1
|
||||||
elif [[ $# -eq 2 ]]; then
|
elif [[ $# -eq 2 ]]; then
|
||||||
justInstall=$1
|
justRun=$1
|
||||||
shouldRunInDocker=$2
|
shouldRunInDocker=$2
|
||||||
elif [[ $# -gt 2 ]]; then
|
elif [[ $# -gt 2 ]]; then
|
||||||
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justInstall: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
|
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justRun: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then
|
if [[ justRun -eq 1 && shouldRunInDocker -eq 1 ]]; then
|
||||||
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justInstall\" to < 0 >"
|
echo -e "Cannot run in docker without re-building the project (just to be safe). Setting \"justRun\" to < 0 >"
|
||||||
justInstall=0
|
justRun=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
gradleVersion="8.4"
|
gradleVersion="8.4"
|
||||||
|
|
||||||
if [[ justInstall -eq 0 ]]; then
|
if [[ justRun -eq 0 ]]; then
|
||||||
|
|
||||||
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then
|
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then
|
||||||
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip
|
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.springframework.stereotype.Component;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
|
import java.text.DecimalFormat;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.CancellationException;
|
import java.util.concurrent.CancellationException;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
@ -54,6 +55,8 @@ public class ScheduledTasks {
|
||||||
@Value("${services.pdfaggregation.controller.assignmentLimit}")
|
@Value("${services.pdfaggregation.controller.assignmentLimit}")
|
||||||
private int assignmentsLimit;
|
private int assignmentsLimit;
|
||||||
|
|
||||||
|
public static final DecimalFormat df = new DecimalFormat("0.00");
|
||||||
|
|
||||||
private final String workerReportsDirPath;
|
private final String workerReportsDirPath;
|
||||||
|
|
||||||
public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0);
|
public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0);
|
||||||
|
@ -131,8 +134,8 @@ public class ScheduledTasks {
|
||||||
|
|
||||||
if ( numFailedTasks > 0 )
|
if ( numFailedTasks > 0 )
|
||||||
logger.warn(numFailedTasks + " out of " + sizeOfFutures + " background tasks have failed!");
|
logger.warn(numFailedTasks + " out of " + sizeOfFutures + " background tasks have failed!");
|
||||||
else
|
else if ( logger.isTraceEnabled() )
|
||||||
logger.debug("All of the " + sizeOfFutures + " background tasks have succeeded.");
|
logger.trace("All of the " + sizeOfFutures + " background tasks have succeeded.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -228,6 +231,8 @@ public class ScheduledTasks {
|
||||||
inspectWorkerReportsAndTakeAction(ActionForWorkerReports.delete_old);
|
inspectWorkerReportsAndTakeAction(ActionForWorkerReports.delete_old);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final double daysToWaitBeforeDeletion = 7.0;
|
||||||
|
|
||||||
|
|
||||||
@Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days.
|
@Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days.
|
||||||
//@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs).
|
//@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs).
|
||||||
|
@ -240,10 +245,10 @@ public class ScheduledTasks {
|
||||||
// The assignments just remain in the table, and the urls cannot be rechecked.
|
// The assignments just remain in the table, and the urls cannot be rechecked.
|
||||||
|
|
||||||
Calendar calendar = Calendar.getInstance();
|
Calendar calendar = Calendar.getInstance();
|
||||||
calendar.add(Calendar.DAY_OF_MONTH, -7); // Subtract 7 days from current.
|
calendar.add(Calendar.DAY_OF_MONTH, - (int) daysToWaitBeforeDeletion); // Subtract <daysToWaitBeforeDeletion> from current Date.
|
||||||
|
|
||||||
DatabaseConnector.databaseLock.lock();
|
DatabaseConnector.databaseLock.lock();
|
||||||
urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside.
|
urlsService.deleteAssignmentsWithOlderDate(calendar.getTimeInMillis()); // Any error-log is written inside.
|
||||||
DatabaseConnector.databaseLock.unlock();
|
DatabaseConnector.databaseLock.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -295,7 +300,6 @@ public class ScheduledTasks {
|
||||||
enum ActionForWorkerReports {process_previous_failed, process_current_failed, delete_old}
|
enum ActionForWorkerReports {process_previous_failed, process_current_failed, delete_old}
|
||||||
|
|
||||||
// TODO - Maybe make these numbers configurable from the "application.yml" file.
|
// TODO - Maybe make these numbers configurable from the "application.yml" file.
|
||||||
private static final double daysToWaitBeforeDeletion = 7.0;
|
|
||||||
|
|
||||||
private static final double daysToWaitBeforeProcessing = 0.5; // 12 hours
|
private static final double daysToWaitBeforeProcessing = 0.5; // 12 hours
|
||||||
|
|
||||||
|
@ -334,7 +338,7 @@ public class ScheduledTasks {
|
||||||
for ( File workerReportSubDir : workerReportSubDirs )
|
for ( File workerReportSubDir : workerReportSubDirs )
|
||||||
{
|
{
|
||||||
File[] workerReportFiles = workerReportSubDir.listFiles(File::isFile);
|
File[] workerReportFiles = workerReportSubDir.listFiles(File::isFile);
|
||||||
if (workerReportFiles == null) {
|
if ( workerReportFiles == null ) {
|
||||||
logger.error("There was an error when getting the workerReports of \"workerReportSubDir\": " + workerReportSubDir);
|
logger.error("There was an error when getting the workerReports of \"workerReportSubDir\": " + workerReportSubDir);
|
||||||
return;
|
return;
|
||||||
} else if (workerReportFiles.length == 0) {
|
} else if (workerReportFiles.length == 0) {
|
||||||
|
@ -370,7 +374,7 @@ public class ScheduledTasks {
|
||||||
} else { // Deletion..
|
} else { // Deletion..
|
||||||
if ( elapsedDays > daysToWaitBeforeDeletion ) {
|
if ( elapsedDays > daysToWaitBeforeDeletion ) {
|
||||||
// Enough time has passed, the directory should be deleted immediately.
|
// Enough time has passed, the directory should be deleted immediately.
|
||||||
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + elapsedDays + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
|
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + df.format(elapsedDays) + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
|
||||||
numWorkerReportsToBeHandled ++;
|
numWorkerReportsToBeHandled ++;
|
||||||
if ( fileUtils.deleteFile(workerReportFile.getAbsolutePath()) // Either successful or failed.
|
if ( fileUtils.deleteFile(workerReportFile.getAbsolutePath()) // Either successful or failed.
|
||||||
&& !workerReportName.contains("successful") // If this has failed or its state is unknown (it was never renamed), then delete the assignment-records. For the successful, they have already been deleted.
|
&& !workerReportName.contains("successful") // If this has failed or its state is unknown (it was never renamed), then delete the assignment-records. For the successful, they have already been deleted.
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.springframework.stereotype.Service;
|
||||||
import javax.xml.bind.DatatypeConverter;
|
import javax.xml.bind.DatatypeConverter;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.net.ConnectException;
|
import java.net.ConnectException;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
|
@ -249,8 +250,8 @@ public class BulkImportServiceImpl implements BulkImportService {
|
||||||
GenericData.Record record = null;
|
GenericData.Record record = null;
|
||||||
try {
|
try {
|
||||||
record = processBulkImportedFile(fileLocation, provenance, bulkImportSource, timeMillis, additionalLoggingMsg);
|
record = processBulkImportedFile(fileLocation, provenance, bulkImportSource, timeMillis, additionalLoggingMsg);
|
||||||
} catch (ConnectException ce) {
|
} catch (Exception e) {
|
||||||
String errorMsg = "ConnectException when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment..";
|
String errorMsg = "Exception when uploading the files of segment_" + segmentCounter + " to the S3 Object Store. Will avoid uploading any file for this segment..";
|
||||||
logger.error(errorMsg + additionalLoggingMsg);
|
logger.error(errorMsg + additionalLoggingMsg);
|
||||||
bulkImportReport.addEvent(errorMsg);
|
bulkImportReport.addEvent(errorMsg);
|
||||||
for ( int j=i; j < numOfFilesInSegment; ++j )
|
for ( int j=i; j < numOfFilesInSegment; ++j )
|
||||||
|
@ -356,7 +357,7 @@ public class BulkImportServiceImpl implements BulkImportService {
|
||||||
|
|
||||||
|
|
||||||
private GenericData.Record processBulkImportedFile(String fileLocation, String provenance, BulkImport.BulkImportSource bulkImportSource, long timeMillis, String additionalLoggingMsg)
|
private GenericData.Record processBulkImportedFile(String fileLocation, String provenance, BulkImport.BulkImportSource bulkImportSource, long timeMillis, String additionalLoggingMsg)
|
||||||
throws ConnectException
|
throws ConnectException, UnknownHostException
|
||||||
{
|
{
|
||||||
File fullTextFile = new File(fileLocation);
|
File fullTextFile = new File(fileLocation);
|
||||||
DocFileData docFileData = new DocFileData(fullTextFile, null, null, null);
|
DocFileData docFileData = new DocFileData(fullTextFile, null, null, null);
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.io.*;
|
||||||
import java.net.ConnectException;
|
import java.net.ConnectException;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
@ -568,12 +569,12 @@ public class FileUtils {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash);
|
String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash);
|
||||||
if (s3Url != null) {
|
if ( s3Url != null ) {
|
||||||
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
|
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
|
||||||
//numUploadedFiles ++;
|
//numUploadedFiles ++;
|
||||||
}
|
}
|
||||||
} catch (ConnectException ce) {
|
} catch (Exception e) {
|
||||||
logger.error("Avoid uploading the rest of the files of this batch..");
|
logger.error("Avoid uploading the rest of the files of batch..");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Else, the record will have its file-data set to "null", in the end of the caller method (as it will not have an s3Url as its location).
|
// Else, the record will have its file-data set to "null", in the end of the caller method (as it will not have an s3Url as its location).
|
||||||
|
@ -585,7 +586,7 @@ public class FileUtils {
|
||||||
|
|
||||||
|
|
||||||
public String constructS3FilenameAndUploadToS3(String targetDirectory, String fileName, String fileNameID,
|
public String constructS3FilenameAndUploadToS3(String targetDirectory, String fileName, String fileNameID,
|
||||||
String dotFileExtension, String datasourceId, String hash) throws ConnectException
|
String dotFileExtension, String datasourceId, String hash) throws ConnectException, UnknownHostException
|
||||||
{
|
{
|
||||||
String filenameForS3 = constructS3FileName(fileName, fileNameID, dotFileExtension, datasourceId, hash); // This name is for the uploaded file, in the S3 Object Store.
|
String filenameForS3 = constructS3FileName(fileName, fileNameID, dotFileExtension, datasourceId, hash); // This name is for the uploaded file, in the S3 Object Store.
|
||||||
if ( filenameForS3 == null ) // The error is logged inside.
|
if ( filenameForS3 == null ) // The error is logged inside.
|
||||||
|
@ -598,6 +599,9 @@ public class FileUtils {
|
||||||
} catch (ConnectException ce) {
|
} catch (ConnectException ce) {
|
||||||
logger.error("Could not connect with the S3 Object Store! " + ce.getMessage());
|
logger.error("Could not connect with the S3 Object Store! " + ce.getMessage());
|
||||||
throw ce;
|
throw ce;
|
||||||
|
} catch (UnknownHostException uhe) {
|
||||||
|
logger.error("The S3 Object Store could not be found! " + uhe.getMessage());
|
||||||
|
throw uhe;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Could not upload the local-file \"" + fileFullPath + "\" to the S3 ObjectStore, with S3-filename: \"" + filenameForS3 + "\"!", e);
|
logger.error("Could not upload the local-file \"" + fileFullPath + "\" to the S3 ObjectStore, with S3-filename: \"" + filenameForS3 + "\"!", e);
|
||||||
return null;
|
return null;
|
||||||
|
|
Loading…
Reference in New Issue