UrlsController/src/main/java/eu/openaire/urls_controller/components/ScheduledTasks.java

226 lines
12 KiB
Java

package eu.openaire.urls_controller.components;
import eu.openaire.urls_controller.Application;
import eu.openaire.urls_controller.controllers.ShutdownController;
import eu.openaire.urls_controller.controllers.StatsController;
import eu.openaire.urls_controller.controllers.UrlsController;
import eu.openaire.urls_controller.util.FileUtils;
import eu.openaire.urls_controller.util.GenericUtils;
import io.micrometer.core.instrument.MeterRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.io.File;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
@Component
public class ScheduledTasks {
private static final Logger logger = LoggerFactory.getLogger(ScheduledTasks.class);
@Autowired
FileUtils fileUtils;
private StatsController statsController;
private final String workerReportsDirPath;
public static final AtomicInteger numOfAllPayloads = new AtomicInteger(0);
public static final AtomicInteger numOfPayloadsAggregatedByServiceThroughCrawling = new AtomicInteger(0);
public static final AtomicInteger numOfPayloadsAggregatedByServiceThroughBulkImport = new AtomicInteger(0);
public static final AtomicInteger numOfPayloadsAggregatedByService = new AtomicInteger(0);
public static final AtomicInteger numOfLegacyPayloads = new AtomicInteger(0);
public static final AtomicInteger numOfRecordsInspectedByServiceThroughCrawling = new AtomicInteger(0);
public ScheduledTasks(@Value("${services.pdfaggregation.controller.workerReportsDirPath}") String workerReportsDirPath, StatsController statsController, MeterRegistry registry)
{
if ( !workerReportsDirPath.endsWith("/") )
workerReportsDirPath += "/";
this.workerReportsDirPath = workerReportsDirPath; // This dir will be created later.
this.statsController = statsController;
registry.gauge("numOfAllPayloads", numOfAllPayloads);
registry.gauge("numOfPayloadsAggregatedByServiceThroughCrawling", numOfPayloadsAggregatedByServiceThroughCrawling);
registry.gauge("numOfPayloadsAggregatedByServiceThroughBulkImport", numOfPayloadsAggregatedByServiceThroughBulkImport);
registry.gauge("numOfPayloadsAggregatedByService", numOfPayloadsAggregatedByService);
registry.gauge("numOfLegacyPayloads", numOfLegacyPayloads);
registry.gauge("numOfRecordsInspectedByServiceThroughCrawling", numOfRecordsInspectedByServiceThroughCrawling);
}
@Scheduled(initialDelay = 1_800_000, fixedDelay = 900_000) // Execute this method 30 mins from the start and 15 mins after each last execution, in order for some tasks to have been gathered.
//@Scheduled(initialDelay = 60_000, fixedDelay = 20_000) // Just for testing (every 20 secs).
// The initial delay is larger, because we have to wait some time for at least one worker to finish retrieving the full-texts from thousands of publications, whereas, later we will have a lot of workerReports waiting to be processed.
public void executeBackgroundTasks()
{
List<Callable<Boolean>> tempList = new ArrayList<>(UrlsController.backgroundCallableTasks); // Copy the list in order to know what was executed.
// So the items added while this execution happens, will remain in the global-list, while the others will have already been deleted.
// Also, the "Executor.invokeAll()" requires an "unchanged" list, otherwise there will be "undefined results".
int numOfTasks = tempList.size(); // Since the temp-list is a deep-copy and not a reference, new tasks that are added will not be executed.
if ( numOfTasks == 0 )
return;
// Immediately delete the selected tasks form the global list, so that if these tasks are not finished before the scheduler runs again, they will not be re-processed.
for ( Callable<Boolean> selectedTask : tempList )
UrlsController.backgroundCallableTasks.remove(selectedTask);
logger.debug(numOfTasks + " background tasks were found inside the \"backgroundCallableTasks\" list and are about to be executed.");
// Execute the tasks and wait for them to finish.
try {
List<Future<Boolean>> futures = UrlsController.backgroundExecutor.invokeAll(tempList);
int sizeOfFutures = futures.size();
for ( int i = 0; i < sizeOfFutures; ++i ) {
try {
Boolean value = futures.get(i).get(); // Get and see if an exception is thrown..
// Add check for the value, if wanted.. (we don't care at the moment)
} catch (ExecutionException ee) {
String stackTraceMessage = GenericUtils.getSelectiveStackTrace(ee, null, 15); // These can be serious errors like an "out of memory exception" (Java HEAP).
logger.error("Task_" + (i+1) + " failed with: " + ee.getMessage() + "\n" + stackTraceMessage);
} catch (CancellationException ce) {
logger.error("Task_" + (i+1) + " was cancelled: " + ce.getMessage());
} catch (IndexOutOfBoundsException ioobe) {
logger.error("IOOBE for task_" + i + " in the futures-list! " + ioobe.getMessage());
}
}
} catch (Exception e) {
logger.error("", e);
}
}
@Scheduled(initialDelay = 600_000, fixedDelay = 7_200_000) // Check every 2 hours. The initial delay is 10 minutes, to allow to shut down quickly in case of problem when starting, but also account for the initial communication with the Workers, where a problem may appear.
//@Scheduled(initialDelay = 60_000, fixedDelay = 20_000) // Just for testing (every 20 secs).
public void checkIfServiceIsReadyForShutdown()
{
if ( ! ShutdownController.shouldShutdownService )
return; // Either the service was never instructed to shut down, or the user canceled the request.
// Check whether there are still background tasks to be processed. Either workerReport or Bulk-import requests.
if ( UrlsController.backgroundCallableTasks.size() > 0 )
return;
Set<String> workerIds = UrlsController.workersInfoMap.keySet();
if ( workerIds.size() > 0 ) {
for ( String workerId : workerIds )
if ( ! UrlsController.workersInfoMap.get(workerId).getHasShutdown() ) // The workerId is certainly inside the map and has a workerInfo value.
return; // If at least 1 worker is still active, then do not shut down the Controller.
logger.info("All workers have already shutdown. Shutting down the Controller..");
} else
logger.info("No workers have participated in the service yet, so the Controller will shut-down immediately.");
// If one worker has crashed, then it will have not informed the Controller. So the controller will think that it is still running and will not shut down.
Application.gentleAppShutdown();
}
private static final double daysToWaitBeforeDeletion = 7.0;
@Scheduled(initialDelay = 604_800_000, fixedDelay = 604_800_000) // Run every 7 days.
//@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs).
public void checkAndDeleteUnsuccessfulWorkerReports()
{
logger.debug("Going to check and remove any unsuccessful workerReports, which are more than 7 days old.");
try {
File workerReportsDir = new File(workerReportsDirPath);
if ( !workerReportsDir.isDirectory() ) {
logger.error("The \"workerReportsDir\" (" + workerReportsDirPath + ") does not exist!"); // This base dir should always exist!
return;
}
File[] workerReports = workerReportsDir.listFiles(File::isFile);
if ( workerReports == null ) {
logger.error("There was an error when getting the subDirs of \"workerReportsDir\": " + workerReportsDir);
return;
} else if ( workerReports.length == 0 ) {
logger.debug("The \"workerReportsDir\" is empty, so there is nothing to delete.");
return;
}
long currentTime = System.currentTimeMillis();
for ( File workerReport : workerReports ) {
long lastModified = workerReport.lastModified();
if ( logger.isTraceEnabled() )
logger.trace("The workerReport \"" + workerReport.getName() + "\" was last accessed in: " + new Date(lastModified));
// Get the difference in hours. /1000 to get seconds, /60 to get minutes, /60 to get hours and /24 to get days.
double elapsedDays = (double) (currentTime - lastModified) / (1000 * 60 * 60 * 24);
if ( elapsedDays > daysToWaitBeforeDeletion ) {
// Enough time has passed, the directory should be deleted immediately.
String workerReportName = workerReport.getName();
logger.warn("The workerReport \"" + workerReportName + "\" was accessed " + elapsedDays + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted.");
fileUtils.deleteFile(workerReport.getAbsolutePath());
}
}
} catch (Exception e) {
logger.error("", e);
}
}
// Scheduled Metrics for Prometheus.
// Prometheus scrapes for metrics usually every 15 seconds, but that is an extremely short time-period for DB-statistics.
@Scheduled(fixedDelay = 21_600_000) // Every 6 hours run the following queries to the database and register the metric.
//@Scheduled(initialDelay = 60_000, fixedDelay = 1_200_000) // For general testing only.
//@Scheduled(initialDelay = 60_000, fixedDelay = 120_000) // For debug testing only.
public void updatePrometheusMetrics()
{
ResponseEntity<?> responseEntity = statsController.getNumberOfAllPayloads(true);
if ( responseEntity.getStatusCode().value() == 200 ) {
numOfAllPayloads.set(Integer.parseInt(responseEntity.getBody().toString())); // (any other cast method fails)
} // Any error is already logged.
responseEntity = statsController.getNumberOfPayloadsAggregatedByServiceThroughCrawling(true);
if ( responseEntity.getStatusCode().value() == 200 ) {
numOfPayloadsAggregatedByServiceThroughCrawling.set(Integer.parseInt(responseEntity.getBody().toString())); // (any other cast method fails)
} // Any error is already logged.
responseEntity = statsController.getNumberOfPayloadsAggregatedByServiceThroughBulkImport(true);
if ( responseEntity.getStatusCode().value() == 200 ) {
numOfPayloadsAggregatedByServiceThroughBulkImport.set(Integer.parseInt(responseEntity.getBody().toString())); // (any other cast method fails)
} // Any error is already logged.
responseEntity = statsController.getNumberOfPayloadsAggregatedByService(true);
if ( responseEntity.getStatusCode().value() == 200 ) {
numOfPayloadsAggregatedByService.set(Integer.parseInt(responseEntity.getBody().toString())); // (any other cast method fails)
} // Any error is already logged.
responseEntity = statsController.getNumberOfLegacyPayloads(true);
if ( responseEntity.getStatusCode().value() == 200 ) {
numOfLegacyPayloads.set(Integer.parseInt(responseEntity.getBody().toString())); // (any other cast method fails)
} // Any error is already logged.
responseEntity = statsController.getNumberOfRecordsInspectedByServiceThroughCrawling(true);
if ( responseEntity.getStatusCode().value() == 200 ) {
numOfRecordsInspectedByServiceThroughCrawling.set(Integer.parseInt(responseEntity.getBody().toString())); // (any other cast method fails)
} // Any error is already logged.
// TODO - Export more complex data; <numOfAllPayloadsPerDatasource>, <numOfAllPayloadsPerYear>,
// <numOfAggregatedPayloadsPerDatasource>, ..., <numOfBulkImportedPayloadsPerDatasource>, ...
}
}