diff --git a/README.md b/README.md index 39d7a90..4c463a0 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This is a public API to get specific statistics from the PDF Aggregation Service.
This service is designed to respond as fast as possible, by cashing the relevant data in memory, instead of running new queries to the database, for every request. -The cached data is updated every 6 hours. +**The cached data is updated every 6 hours**.

diff --git a/build.gradle b/build.gradle index 38c85e8..6dd4be7 100644 --- a/build.gradle +++ b/build.gradle @@ -84,7 +84,7 @@ dependencies { exclude group: 'io.netty', module: 'netty' } - // Add back some updated version of the needed dependencies. + // Add back some updated versions of the needed dependencies. implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8. implementation 'com.fasterxml.woodstox:woodstox-core:6.5.1' diff --git a/installAndRun.sh b/installAndRun.sh index e87151d..608b595 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -57,8 +57,8 @@ if [[ justInstall -eq 0 ]]; then # Run in "detached mode" -d (in the background). (sudo docker compose up --build -d && echo -e "\nThe pdf_aggregation_statistics docker-container started running.\n") || handle_error "Could not build and/or run the 'pdf_aggregation_statistics' docker container!" 4 - echo -e "Waiting 55 seconds before getting the status..\n" - sleep 55 + echo -e "Waiting 60 seconds before getting the status..\n" + sleep 60 sudo docker ps -a || handle_error "Could not get the status of docker-containers!" 6 # Using -a to get the status of failed containers as well. echo -e "\n\nGetting the logs of docker-container \"pdf_aggregation_statistics\":\n" sudo docker logs "$(sudo docker ps -aqf "name=^pdf_aggregation_statistics$")" || handle_error "Could not get the logs of docker-container \"pdf_aggregation_statistics\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log. diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplication.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplication.java index 5509b4e..4b6c51d 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplication.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/PdfAggregationStatisticsApplication.java @@ -1,10 +1,14 @@ package eu.openaire.pdf_aggregation_statistics; +import eu.openaire.pdf_aggregation_statistics.components.SchedulingTasks; import eu.openaire.pdf_aggregation_statistics.util.UriBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.boot.CommandLineRunner; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext; +import org.springframework.context.ConfigurableApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.core.env.Environment; import org.springframework.scheduling.annotation.EnableScheduling; @@ -12,6 +16,7 @@ import org.springframework.web.cors.CorsConfiguration; import org.springframework.web.cors.CorsConfigurationSource; import org.springframework.web.cors.UrlBasedCorsConfigurationSource; +import javax.annotation.PreDestroy; import java.util.Arrays; import java.util.Collections; @@ -19,8 +24,13 @@ import java.util.Collections; @EnableScheduling public class PdfAggregationStatisticsApplication { + private static final Logger logger = LoggerFactory.getLogger(PdfAggregationStatisticsApplication.class); + + + private static ConfigurableApplicationContext context; + public static void main(String[] args) { - SpringApplication.run(PdfAggregationStatisticsApplication.class, args); + context = SpringApplication.run(PdfAggregationStatisticsApplication.class, args); } @@ -37,7 +47,23 @@ public class PdfAggregationStatisticsApplication { } - // TODO - Add GENTLE SHUTDOWN BEAN + public static void gentleAppShutdown() + { + int exitCode = 0; + try { + exitCode = SpringApplication.exit(context, () -> 0); // The "PreDestroy" method will be called. (the "context" will be closed automatically (I checked it)) + } catch (IllegalArgumentException iae) { + logger.error(iae.getMessage()); // This will say "Context must not be null", in case the "gentleAppShutdown()" was called too early in the app's lifetime. But it's ok. + } + System.exit(exitCode); + } + + + @PreDestroy + public void preDestroy() { + // TODO - Add any future shut-down logic here (e.g. shutting down threads). + logger.info("Exiting.."); + } @Bean diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java index 2aac50f..cf594ed 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/components/SchedulingTasks.java @@ -1,5 +1,6 @@ package eu.openaire.pdf_aggregation_statistics.components; +import eu.openaire.pdf_aggregation_statistics.PdfAggregationStatisticsApplication; import eu.openaire.pdf_aggregation_statistics.services.StatsService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,21 +18,23 @@ public class SchedulingTasks { StatsService statsService; + private static boolean runningFirstTime = true; + + @Scheduled(initialDelay = 1, fixedDelay = 21_600_000) // Run right after initialization and then every 6 hours. public void gatherPayloadsPerDatasource() { // Request the number of payloads for each datasource and keep them in a ConcurrentHashMap, // where the "key" will be the "datasourceId" and the "value" will be the numOfPayloads for that datasource. - // When the user requests the numOfPayloads for a given datasourceI, the app will return the rwsult immediately - // It will be a quick O(1) get operation in the HashMap. + // When the user requests the numOfPayloads for a given datasourceI, the app will return the result immediately. + // It will be a quick O(1) GET operation in the ConcurrentHashMap. - statsService.gatherNumberOfPayloadsPerDatasource(); - - // TODO - If this is the first time running (check a boolean) - // TODO - AND the above method failed, then gently shutdown the app! - // todo - there is no point in keeping it running if it cannot provide any stat! + if ( ! statsService.gatherNumberOfPayloadsPerDatasource() + && runningFirstTime ) + PdfAggregationStatisticsApplication.gentleAppShutdown(); + runningFirstTime = false; } } diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/controllers/StatsController.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/controllers/StatsController.java index 560d766..9b9ce24 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/controllers/StatsController.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/controllers/StatsController.java @@ -51,14 +51,6 @@ public class StatsController { // The Map has the numOfPayloads for all datasources, even for newly added ones. // If the given datasourceID is not found in the map, then either is not a datasource or that datasource is not participating in the OpenAIRE Graph. - if ( StatsServiceImpl.datasourcesWithNumOfPayloads.isEmpty() ) { - errorMsg = "The \"datasourcesWithNumOfPayloads\" map was not populated!"; - logger.error(errorMsg); - return ResponseEntity.internalServerError().body(errorMsg); - } - // TODO - The above check is expensive (check the code behind) and unessesery to run every time, since if the map is populated at init then it will not be empty!! - // TODO - if the map was not populated at init then the whole service should not run!! (instead of returning 500-SERVER-ERROR in every request) - Integer numPayloads = StatsServiceImpl.datasourcesWithNumOfPayloads.get(datasourceId); if ( numPayloads == null ) return ResponseEntity.notFound().build(); diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java index 7ef9dd1..a7c405d 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsService.java @@ -3,6 +3,6 @@ package eu.openaire.pdf_aggregation_statistics.services; public interface StatsService { - void gatherNumberOfPayloadsPerDatasource(); + boolean gatherNumberOfPayloadsPerDatasource(); } diff --git a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java index 9b98f21..9c198eb 100644 --- a/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java +++ b/src/main/java/eu/openaire/pdf_aggregation_statistics/services/StatsServiceImpl.java @@ -28,7 +28,7 @@ public class StatsServiceImpl implements StatsService { public static final ConcurrentHashMap datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000. - public void gatherNumberOfPayloadsPerDatasource() + public boolean gatherNumberOfPayloadsPerDatasource() { final String getNumberOfPayloadsPerDatasourceQuery = "select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" + @@ -49,10 +49,13 @@ public class StatsServiceImpl implements StatsService { } }); logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated."); + return true; } catch (EmptyResultDataAccessException erdae) { - logger.warn("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery); + logger.error("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery); + return false; } catch (Exception e) { logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e); + return false; } }