2021-03-16 14:25:15 +01:00
package eu.openaire.urls_controller.components ;
2023-05-24 12:42:29 +02:00
import eu.openaire.urls_controller.Application ;
import eu.openaire.urls_controller.controllers.ShutdownController ;
2023-06-19 13:42:00 +02:00
import eu.openaire.urls_controller.controllers.StatsController ;
2023-05-24 12:42:29 +02:00
import eu.openaire.urls_controller.controllers.UrlsController ;
2023-05-24 15:59:42 +02:00
import eu.openaire.urls_controller.util.FileUtils ;
2023-05-11 02:07:55 +02:00
import eu.openaire.urls_controller.util.GenericUtils ;
2023-06-19 13:42:00 +02:00
import io.micrometer.core.instrument.MeterRegistry ;
2021-03-16 14:25:15 +01:00
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
2023-05-24 15:59:42 +02:00
import org.springframework.beans.factory.annotation.Autowired ;
import org.springframework.beans.factory.annotation.Value ;
2023-06-19 13:42:00 +02:00
import org.springframework.http.ResponseEntity ;
2023-05-11 02:07:55 +02:00
import org.springframework.scheduling.annotation.Scheduled ;
2021-03-16 14:25:15 +01:00
import org.springframework.stereotype.Component ;
2023-05-24 15:59:42 +02:00
import java.io.File ;
2023-05-11 02:07:55 +02:00
import java.util.ArrayList ;
2023-05-24 15:59:42 +02:00
import java.util.Date ;
2023-05-11 02:07:55 +02:00
import java.util.List ;
2023-06-15 22:19:36 +02:00
import java.util.Set ;
2023-05-11 02:07:55 +02:00
import java.util.concurrent.Callable ;
import java.util.concurrent.CancellationException ;
import java.util.concurrent.ExecutionException ;
import java.util.concurrent.Future ;
2023-06-19 13:42:00 +02:00
import java.util.concurrent.atomic.AtomicInteger ;
2021-03-16 14:25:15 +01:00
@Component
public class ScheduledTasks {
private static final Logger logger = LoggerFactory . getLogger ( ScheduledTasks . class ) ;
2023-05-24 15:59:42 +02:00
@Autowired
FileUtils fileUtils ;
2021-03-16 14:25:15 +01:00
2023-06-19 13:42:00 +02:00
private StatsController statsController ;
2023-05-24 15:59:42 +02:00
private final String workerReportsDirPath ;
2023-06-23 14:22:26 +02:00
public static final AtomicInteger numOfAllPayloads = new AtomicInteger ( 0 ) ;
public static final AtomicInteger numOfPayloadsAggregatedByServiceThroughCrawling = new AtomicInteger ( 0 ) ;
public static final AtomicInteger numOfPayloadsAggregatedByServiceThroughBulkImport = new AtomicInteger ( 0 ) ;
public static final AtomicInteger numOfPayloadsAggregatedByService = new AtomicInteger ( 0 ) ;
public static final AtomicInteger numOfLegacyPayloads = new AtomicInteger ( 0 ) ;
public static final AtomicInteger numOfRecordsInspectedByServiceThroughCrawling = new AtomicInteger ( 0 ) ;
2023-06-19 13:42:00 +02:00
2023-05-24 15:59:42 +02:00
2023-06-19 13:42:00 +02:00
public ScheduledTasks ( @Value ( " ${services.pdfaggregation.controller.workerReportsDirPath} " ) String workerReportsDirPath , StatsController statsController , MeterRegistry registry )
2023-05-24 15:59:42 +02:00
{
if ( ! workerReportsDirPath . endsWith ( " / " ) )
workerReportsDirPath + = " / " ;
this . workerReportsDirPath = workerReportsDirPath ; // This dir will be created later.
2023-06-19 13:42:00 +02:00
this . statsController = statsController ;
registry . gauge ( " numOfAllPayloads " , numOfAllPayloads ) ;
2023-06-23 14:22:26 +02:00
registry . gauge ( " numOfPayloadsAggregatedByServiceThroughCrawling " , numOfPayloadsAggregatedByServiceThroughCrawling ) ;
registry . gauge ( " numOfPayloadsAggregatedByServiceThroughBulkImport " , numOfPayloadsAggregatedByServiceThroughBulkImport ) ;
registry . gauge ( " numOfPayloadsAggregatedByService " , numOfPayloadsAggregatedByService ) ;
registry . gauge ( " numOfLegacyPayloads " , numOfLegacyPayloads ) ;
registry . gauge ( " numOfRecordsInspectedByServiceThroughCrawling " , numOfRecordsInspectedByServiceThroughCrawling ) ;
2023-05-24 15:59:42 +02:00
}
2023-05-24 23:34:36 +02:00
@Scheduled ( initialDelay = 1_800_000 , fixedDelay = 1_800_000 ) // Execute this method 30 mins from the start and 30 mins after each last execution, in order for some tasks to have been gathered.
//@Scheduled(initialDelay = 20_000, fixedDelay = 20_000) // Just for testing (every 20 secs).
2023-05-11 02:07:55 +02:00
public void executeBackgroundTasks ( )
{
2023-05-29 11:21:48 +02:00
List < Callable < Boolean > > tempList = new ArrayList < > ( UrlsController . backgroundCallableTasks ) ; // Copy the list in order to know what was executed.
2023-05-26 12:08:00 +02:00
// So the items added while this execution happens, will be remain in the global-list, while the other will have already be deleted.
2023-05-11 02:07:55 +02:00
int numOfTasks = tempList . size ( ) ; // Since the temp-list is a deep-copy and not a reference, new tasks that are added will not be executed.
if ( numOfTasks = = 0 )
return ;
2023-05-26 12:08:00 +02:00
// Immediately delete the selected tasks form the global list, so that if these tasks are not finished before the scheduler runs again, they will not be re-executed.
2023-06-06 15:49:53 +02:00
for ( Callable < Boolean > selectedTask : tempList )
2023-05-29 11:21:48 +02:00
UrlsController . backgroundCallableTasks . remove ( selectedTask ) ;
2023-05-26 12:08:00 +02:00
2023-05-11 02:07:55 +02:00
logger . debug ( numOfTasks + " background tasks were found inside the \" backgroundCallableTasks \" list and are about to be executed. " ) ;
// Execute the tasks and wait for them to finish.
try {
2023-05-29 11:21:48 +02:00
List < Future < Boolean > > futures = UrlsController . backgroundExecutor . invokeAll ( tempList ) ;
2023-05-11 02:07:55 +02:00
int sizeOfFutures = futures . size ( ) ;
for ( int i = 0 ; i < sizeOfFutures ; + + i ) {
try {
Boolean value = futures . get ( i ) . get ( ) ; // Get and see if an exception is thrown..
// Add check for the value, if wanted.. (we don't care at the moment)
} catch ( ExecutionException ee ) {
String stackTraceMessage = GenericUtils . getSelectiveStackTrace ( ee , null , 15 ) ; // These can be serious errors like an "out of memory exception" (Java HEAP).
logger . error ( " Task_ " + ( i + 1 ) + " failed with: " + ee . getMessage ( ) + " \ n " + stackTraceMessage ) ;
} catch ( CancellationException ce ) {
logger . error ( " Task_ " + ( i + 1 ) + " was cancelled: " + ce . getMessage ( ) ) ;
} catch ( IndexOutOfBoundsException ioobe ) {
logger . error ( " IOOBE for task_ " + i + " in the futures-list! " + ioobe . getMessage ( ) ) ;
}
}
} catch ( Exception e ) {
logger . error ( " " , e ) ;
}
2021-03-16 14:25:15 +01:00
}
2023-05-11 02:07:55 +02:00
2023-05-24 12:42:29 +02:00
2023-06-06 15:49:53 +02:00
@Scheduled ( initialDelay = 600_000 , fixedDelay = 7_200_000 ) // Check every 2 hours. The initial delay is 10 minutes, to allow to shut down quickly in case of problem when starting, but also account for the initial communication with the Workers, where a problem may appear.
2023-05-24 15:59:42 +02:00
//@Scheduled(initialDelay = 60_000, fixedDelay = 20_000) // Just for testing (every 20 secs).
2023-05-24 12:42:29 +02:00
public void checkIfServiceIsReadyForShutdown ( )
{
if ( ! ShutdownController . shouldShutdownService )
return ; // Either the service was never instructed to shut down, or the user canceled the request.
2023-05-27 01:36:05 +02:00
// If the workers have shutdown on their own, without been instructed to by the Controller, then the Controller will keep running.
2023-06-15 22:19:36 +02:00
Set < String > workerIds = UrlsController . workersInfoMap . keySet ( ) ;
if ( workerIds . size ( ) > 0 ) {
for ( String workerId : workerIds )
if ( ! UrlsController . workersInfoMap . get ( workerId ) . getHasShutdown ( ) ) // The workerId is certainly inside the map and has a workerInfo value.
return ; // If at least 1 worker is still active, then do not shut down the Controller.
logger . info ( " All workers have already shutdown. Shutting down the Controller.. " ) ;
} else
logger . info ( " No workers have participated in the service yet, so the Controller will shut-down immediately. " ) ;
2023-05-24 12:42:29 +02:00
Application . gentleAppShutdown ( ) ;
}
2023-05-24 15:59:42 +02:00
2023-05-24 23:34:36 +02:00
private static final double daysToWaitBeforeDeletion = 7 . 0 ;
2023-05-24 15:59:42 +02:00
2023-05-24 23:34:36 +02:00
@Scheduled ( initialDelay = 604_800_000 , fixedDelay = 604_800_000 ) // Run every 7 days.
//@Scheduled(initialDelay = 1_200_000, fixedDelay = 1_200_000) // Just for testing (every 1200 secs).
2023-05-24 15:59:42 +02:00
public void checkAndDeleteUnsuccessfulWorkerReports ( )
{
logger . debug ( " Going to check and remove any unsuccessful workerReports, which are more than 7 days old. " ) ;
try {
File workerReportsDir = new File ( workerReportsDirPath ) ;
if ( ! workerReportsDir . isDirectory ( ) ) {
logger . error ( " The \" workerReportsDir \" ( " + workerReportsDirPath + " ) does not exist! " ) ; // This base dir should always exist!
return ;
}
File [ ] workerReports = workerReportsDir . listFiles ( File : : isFile ) ;
if ( workerReports = = null ) {
logger . error ( " There was an error when getting the subDirs of \" workerReportsDir \" : " + workerReportsDir ) ;
return ;
}
2023-05-24 23:34:36 +02:00
else if ( workerReports . length = = 0 ) {
2023-05-24 15:59:42 +02:00
logger . debug ( " The \" workerReportsDir \" is empty, so there is nothing to delete. " ) ;
return ;
}
long currentTime = System . currentTimeMillis ( ) ;
// Loop through the array and print only the directories
for ( File workerReport : workerReports ) {
long lastModified = workerReport . lastModified ( ) ;
if ( logger . isTraceEnabled ( ) )
logger . trace ( " The workerReport \" " + workerReport . getName ( ) + " \" was last accessed in: " + new Date ( lastModified ) ) ;
// Get the difference in hours. /1000 to get seconds, /60 to get minutes, /60 to get hours and /24 to get days.
2023-05-24 23:34:36 +02:00
double elapsedDays = ( double ) ( currentTime - lastModified ) / ( 1000 * 60 * 60 * 24 ) ;
if ( elapsedDays > daysToWaitBeforeDeletion ) {
2023-05-24 15:59:42 +02:00
// Enough time has passed, the directory should be deleted immediately.
String workerReportName = workerReport . getName ( ) ;
2023-05-24 23:34:36 +02:00
logger . warn ( " The workerReport \" " + workerReportName + " \" was accessed " + elapsedDays + " days ago (passed the " + daysToWaitBeforeDeletion + " days limit) and will be deleted. " ) ;
2023-05-24 15:59:42 +02:00
fileUtils . deleteFile ( workerReport . getAbsolutePath ( ) ) ;
}
}
} catch ( Exception e ) {
logger . error ( " " , e ) ;
}
}
2023-06-19 13:42:00 +02:00
// Scheduled Metrics for Prometheus.
// Prometheus scrapes for metrics usually every 15 seconds, but that is an extremely short time-period for DB-statistics.
2023-07-13 17:32:45 +02:00
@Scheduled ( fixedDelay = 21_600_000 ) // Every 6 hours run the following queries to the database and register the metric.
//@Scheduled(initialDelay = 60_000, fixedDelay = 1_200_000) // For general testing only.
2023-07-05 16:10:30 +02:00
//@Scheduled(initialDelay = 60_000, fixedDelay = 120_000) // For debug testing only.
2023-06-19 13:42:00 +02:00
public void updatePrometheusMetrics ( )
{
ResponseEntity < ? > responseEntity = statsController . getNumberOfAllPayloads ( true ) ;
if ( responseEntity . getStatusCode ( ) . value ( ) = = 200 ) {
2023-07-13 17:32:45 +02:00
numOfAllPayloads . set ( Integer . parseInt ( responseEntity . getBody ( ) . toString ( ) ) ) ; // (any other cast method fails)
2023-06-19 13:42:00 +02:00
} // Any error is already logged.
2023-06-23 14:22:26 +02:00
responseEntity = statsController . getNumberOfPayloadsAggregatedByServiceThroughCrawling ( true ) ;
if ( responseEntity . getStatusCode ( ) . value ( ) = = 200 ) {
2023-07-13 17:32:45 +02:00
numOfPayloadsAggregatedByServiceThroughCrawling . set ( Integer . parseInt ( responseEntity . getBody ( ) . toString ( ) ) ) ; // (any other cast method fails)
2023-06-23 14:22:26 +02:00
} // Any error is already logged.
responseEntity = statsController . getNumberOfPayloadsAggregatedByServiceThroughBulkImport ( true ) ;
if ( responseEntity . getStatusCode ( ) . value ( ) = = 200 ) {
2023-07-13 17:32:45 +02:00
numOfPayloadsAggregatedByServiceThroughBulkImport . set ( Integer . parseInt ( responseEntity . getBody ( ) . toString ( ) ) ) ; // (any other cast method fails)
2023-06-23 14:22:26 +02:00
} // Any error is already logged.
responseEntity = statsController . getNumberOfPayloadsAggregatedByService ( true ) ;
if ( responseEntity . getStatusCode ( ) . value ( ) = = 200 ) {
2023-07-13 17:32:45 +02:00
numOfPayloadsAggregatedByService . set ( Integer . parseInt ( responseEntity . getBody ( ) . toString ( ) ) ) ; // (any other cast method fails)
2023-06-23 14:22:26 +02:00
} // Any error is already logged.
responseEntity = statsController . getNumberOfLegacyPayloads ( true ) ;
if ( responseEntity . getStatusCode ( ) . value ( ) = = 200 ) {
2023-07-13 17:32:45 +02:00
numOfLegacyPayloads . set ( Integer . parseInt ( responseEntity . getBody ( ) . toString ( ) ) ) ; // (any other cast method fails)
2023-06-23 14:22:26 +02:00
} // Any error is already logged.
responseEntity = statsController . getNumberOfRecordsInspectedByServiceThroughCrawling ( true ) ;
2023-06-19 13:42:00 +02:00
if ( responseEntity . getStatusCode ( ) . value ( ) = = 200 ) {
2023-07-13 17:32:45 +02:00
numOfRecordsInspectedByServiceThroughCrawling . set ( Integer . parseInt ( responseEntity . getBody ( ) . toString ( ) ) ) ; // (any other cast method fails)
2023-06-19 13:42:00 +02:00
} // Any error is already logged.
// TODO - Export more complex data; <numOfAllPayloadsPerDatasource>, <numOfAllPayloadsPerYear>,
// <numOfAggregatedPayloadsPerDatasource>, ..., <numOfBulkImportedPayloadsPerDatasource>, ...
}
2023-03-21 16:04:28 +01:00
}