222 lines
12 KiB
Java
222 lines
12 KiB
Java
package eu.openaire.urls_controller.components;
|
|
|
|
import eu.openaire.urls_controller.Application;
|
|
import eu.openaire.urls_controller.controllers.ShutdownController;
|
|
import eu.openaire.urls_controller.controllers.StatsController;
|
|
import eu.openaire.urls_controller.controllers.UrlsController;
|
|
import eu.openaire.urls_controller.util.FileUtils;
|
|
import eu.openaire.urls_controller.util.GenericUtils;
|
|
import io.micrometer.core.instrument.MeterRegistry;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
import org.springframework.http.ResponseEntity;
|
|
import org.springframework.scheduling.annotation.Scheduled;
|
|
import org.springframework.stereotype.Component;
|
|
|
|
import java.io.File;
|
|
import java.util.ArrayList;
|
|
import java.util.Date;
|
|
import java.util.List;
|
|
import java.util.Set;
|
|
import java.util.concurrent.Callable;
|
|
import java.util.concurrent.CancellationException;
|
|
import java.util.concurrent.ExecutionException;
|
|
import java.util.concurrent.Future;
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
|
|
@Component
|
|
public class ScheduledTasks {
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(ScheduledTasks.class);
|
|
|
|
@Autowired
|
|
FileUtils fileUtils;
|
|
|
|
private StatsController statsController;
|
|
|
|
|
|
private final String workerReportsDirPath;
|
|
|
|
public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0);
|
|
public static final AtomicInteger numOfPayloadsAggregatedByServiceThroughCrawling = new AtomicInteger(0);
|
|
public static final AtomicInteger numOfPayloadsAggregatedByServiceThroughBulkImport = new AtomicInteger(0);
|
|
public static final AtomicInteger numOfPayloadsAggregatedByService = new AtomicInteger(0);
|
|
public static final AtomicInteger numOfLegacyPayloads = new AtomicInteger(0);
|
|
|
|
public static final AtomicInteger numOfRecordsInspectedByServiceThroughCrawling = new AtomicInteger(0);
|
|
|
|
|
|
public ScheduledTasks(@Value("${services.pdfaggregation.controller.workerReportsDirPath}") String workerReportsDirPath, StatsController statsController, MeterRegistry registry)
|
|
{
|
|
if ( !workerReportsDirPath.endsWith("/") )
|
|
workerReportsDirPath += "/";
|
|
|
|
this.workerReportsDirPath = workerReportsDirPath; // This dir will be created later.
|
|
this.statsController = statsController;
|
|
|
|
registry.gauge("numOfAllPayloads", numOfAllPayloads);
|
|
registry.gauge("numOfPayloadsAggregatedByServiceThroughCrawling", numOfPayloadsAggregatedByServiceThroughCrawling);
|
|
registry.gauge("numOfPayloadsAggregatedByServiceThroughBulkImport", numOfPayloadsAggregatedByServiceThroughBulkImport);
|
|
registry.gauge("numOfPayloadsAggregatedByService", numOfPayloadsAggregatedByService);
|
|
registry.gauge("numOfLegacyPayloads", numOfLegacyPayloads);
|
|
registry.gauge("numOfRecordsInspectedByServiceThroughCrawling", numOfRecordsInspectedByServiceThroughCrawling);
|
|
}
|
|
|
|
|
|
@Scheduled(initialDelay = 1_800_000, fixedDelay = 1_800_000) // Execute this method 30 mins from the start and 30 mins after each last execution, in order for some tasks to have been gathered.
|
|
//@Scheduled(initialDelay = 20_000, fixedDelay = 20_000) // Just for testing (every 20 secs).
|
|
public void executeBackgroundTasks()
|
|
{
|
|
List<Callable<Boolean>> tempList = new ArrayList<>(UrlsController.backgroundCallableTasks); // Copy the list in order to know what was executed.
|
|
// So the items added while this execution happens, will be remain in the global-list, while the other will have already be deleted.
|
|
int numOfTasks = tempList.size(); // Since the temp-list is a deep-copy and not a reference, new tasks that are added will not be executed.
|
|
if ( numOfTasks == 0 )
|
|
return;
|
|
|
|
// Immediately delete the selected tasks form the global list, so that if these tasks are not finished before the scheduler runs again, they will not be re-executed.
|
|
for ( Callable<Boolean> selectedTask : tempList )
|
|
UrlsController.backgroundCallableTasks.remove(selectedTask);
|
|
|
|
logger.debug(numOfTasks + " background tasks were found inside the \"backgroundCallableTasks\" list and are about to be executed.");
|
|
// Execute the tasks and wait for them to finish.
|
|
try {
|
|
List<Future<Boolean>> futures = UrlsController.backgroundExecutor.invokeAll(tempList);
|
|
int sizeOfFutures = futures.size();
|
|
for ( int i = 0; i < sizeOfFutures; ++i ) {
|
|
try {
|
|
Boolean value = futures.get(i).get(); // Get and see if an exception is thrown..
|
|
// Add check for the value, if wanted.. (we don't care at the moment)
|
|
} catch (ExecutionException ee) {
|
|
String stackTraceMessage = GenericUtils.getSelectiveStackTrace(ee, null, 15); // These can be serious errors like an "out of memory exception" (Java HEAP).
|
|
logger.error("Task_" + (i+1) + " failed with: " + ee.getMessage() + "\n" + stackTraceMessage);
|
|
} catch (CancellationException ce) {
|
|
logger.error("Task_" + (i+1) + " was cancelled: " + ce.getMessage());
|
|
} catch (IndexOutOfBoundsException ioobe) {
|
|
logger.error("IOOBE for task_" + i + " in the futures-list! " + ioobe.getMessage());
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
logger.error("", e);
|
|
}
|
|
}
|
|
|
|
|
|
@Scheduled(initialDelay = 600_000, fixedDelay = 7_200_000) // Check every 2 hours. The initial delay is 10 minutes, to allow to shut down quickly in case of problem when starting, but also account for the initial communication with the Workers, where a problem may appear.
|
|
//@Scheduled(initialDelay = 60_000, fixedDelay = 20_000) // Just for testing (every 20 secs).
|
|
public void checkIfServiceIsReadyForShutdown()
|
|
{
|
|
if ( ! ShutdownController.shouldShutdownService )
|
|
return; // Either the service was never instructed to shut down, or the user canceled the request.
|
|
|
|
// If the workers have shutdown on their own, without been instructed to by the Controller, then the Controller will keep running.
|
|
|
|
Set<String> workerIds = UrlsController.workersInfoMap.keySet();
|
|
if ( workerIds.size() > 0 ) {
|
|
for ( String workerId : workerIds )
|
|
if ( ! UrlsController.workersInfoMap.get(workerId).getHasShutdown() ) // The workerId is certainly inside the map and has a workerInfo value.
|
|
return; // If at least 1 worker is still active, then do not shut down the Controller.
|
|
logger.info("All workers have already shutdown. Shutting down the Controller..");
|
|
} else
|
|
logger.info("No workers have participated in the service yet, so the Controller will shut-down immediately.");
|
|
|
|
Application.gentleAppShutdown();
|
|
}
|
|
|
|
|
|
private static final double daysToWaitBeforeDeletion = 7.0;
|
|
|
|
|
|
@Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days.
|
|
//@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs).
|
|
public void checkAndDeleteUnsuccessfulWorkerReports()
|
|
{
|
|
logger.debug("Going to check and remove any unsuccessful workerReports, which are more than 7 days old.");
|
|
try {
|
|
File workerReportsDir = new File(workerReportsDirPath);
|
|
if ( !workerReportsDir.isDirectory() ) {
|
|
logger.error("The \"workerReportsDir\" (" + workerReportsDirPath + ") does not exist!"); // This base dir should always exist!
|
|
return;
|
|
}
|
|
|
|
File[] workerReports = workerReportsDir.listFiles(File::isFile);
|
|
if ( workerReports == null ) {
|
|
logger.error("There was an error when getting the subDirs of \"workerReportsDir\": " + workerReportsDir);
|
|
return;
|
|
}
|
|
else if ( workerReports.length == 0 ) {
|
|
logger.debug("The \"workerReportsDir\" is empty, so there is nothing to delete.");
|
|
return;
|
|
}
|
|
|
|
long currentTime = System.currentTimeMillis();
|
|
|
|
// Loop through the array and print only the directories
|
|
for ( File workerReport : workerReports ) {
|
|
long lastModified = workerReport.lastModified();
|
|
|
|
if ( logger.isTraceEnabled() )
|
|
logger.trace("The workerReport \"" + workerReport.getName() + "\" was last accessed in: " + new Date(lastModified));
|
|
|
|
// Get the difference in hours. /1000 to get seconds, /60 to get minutes, /60 to get hours and /24 to get days.
|
|
double elapsedDays = (double) (currentTime - lastModified) / (1000 * 60 * 60 * 24);
|
|
if ( elapsedDays > daysToWaitBeforeDeletion ) {
|
|
// Enough time has passed, the directory should be deleted immediately.
|
|
String workerReportName = workerReport.getName();
|
|
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + elapsedDays + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
|
|
fileUtils.deleteFile(workerReport.getAbsolutePath());
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
logger.error("", e);
|
|
}
|
|
}
|
|
|
|
|
|
// Scheduled Metrics for Prometheus.
|
|
// Prometheus scrapes for metrics usually every 15 seconds, but that is an extremely short time-period for DB-statistics.
|
|
|
|
@Scheduled(fixedDelay = 21_600_000) // Every 6 hours run thw following queries to the database and register the metric.
|
|
@Scheduled(initialDelay = 60_000, fixedDelay = 1_200_000) // For general testing only.
|
|
//@Scheduled(initialDelay = 60_000, fixedDelay = 120_000) // For debug testing only.
|
|
public void updatePrometheusMetrics()
|
|
{
|
|
ResponseEntity<?> responseEntity = statsController.getNumberOfAllPayloads(true);
|
|
if ( responseEntity.getStatusCode().value() == 200 ) {
|
|
numOfAllPayloads.set(Integer.valueOf(responseEntity.getBody().toString())); // (any other cast method fails)
|
|
} // Any error is already logged.
|
|
|
|
responseEntity = statsController.getNumberOfPayloadsAggregatedByServiceThroughCrawling(true);
|
|
if ( responseEntity.getStatusCode().value() == 200 ) {
|
|
numOfPayloadsAggregatedByServiceThroughCrawling.set(Integer.valueOf(responseEntity.getBody().toString())); // (any other cast method fails)
|
|
} // Any error is already logged.
|
|
|
|
responseEntity = statsController.getNumberOfPayloadsAggregatedByServiceThroughBulkImport(true);
|
|
if ( responseEntity.getStatusCode().value() == 200 ) {
|
|
numOfPayloadsAggregatedByServiceThroughBulkImport.set(Integer.valueOf(responseEntity.getBody().toString())); // (any other cast method fails)
|
|
} // Any error is already logged.
|
|
|
|
responseEntity = statsController.getNumberOfPayloadsAggregatedByService(true);
|
|
if ( responseEntity.getStatusCode().value() == 200 ) {
|
|
numOfPayloadsAggregatedByService.set(Integer.valueOf(responseEntity.getBody().toString())); // (any other cast method fails)
|
|
} // Any error is already logged.
|
|
|
|
responseEntity = statsController.getNumberOfLegacyPayloads(true);
|
|
if ( responseEntity.getStatusCode().value() == 200 ) {
|
|
numOfLegacyPayloads.set(Integer.valueOf(responseEntity.getBody().toString())); // (any other cast method fails)
|
|
} // Any error is already logged.
|
|
|
|
responseEntity = statsController.getNumberOfRecordsInspectedByServiceThroughCrawling(true);
|
|
if ( responseEntity.getStatusCode().value() == 200 ) {
|
|
numOfRecordsInspectedByServiceThroughCrawling.set(Integer.valueOf(responseEntity.getBody().toString())); // (any other cast method fails)
|
|
} // Any error is already logged.
|
|
|
|
|
|
// TODO - Export more complex data; <numOfAllPayloadsPerDatasource>, <numOfAllPayloadsPerYear>,
|
|
// <numOfAggregatedPayloadsPerDatasource>, ..., <numOfBulkImportedPayloadsPerDatasource>, ...
|
|
}
|
|
|
|
}
|