- Improve performance by avoiding checking if the huge ConcurrentHashMap is empty, on every request. After the first data-gathering, it will neven be empty and the checking-code is expensive.

- In case of an error retrieving data from the DB during initialization, gently shutdown the App. There is no point keeping the app running just to serve 500-Server-Errors.
- Code polishing.
This commit is contained in:
Lampros Smyrnaios 2023-06-22 01:54:47 +03:00
parent 64ef1bcf40
commit 7e5c338de3
8 changed files with 48 additions and 24 deletions

View File

@ -2,7 +2,7 @@
This is a public API to get specific statistics from the PDF Aggregation Service.<br>
This service is designed to respond as fast as possible, by cashing the relevant data in memory, instead of running new queries to the database, for every request.
The cached data is updated every 6 hours.
**The cached data is updated every 6 hours**.
<br>
<br>

View File

@ -84,7 +84,7 @@ dependencies {
exclude group: 'io.netty', module: 'netty'
}
// Add back some updated version of the needed dependencies.
// Add back some updated versions of the needed dependencies.
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
implementation 'com.fasterxml.woodstox:woodstox-core:6.5.1'

View File

@ -57,8 +57,8 @@ if [[ justInstall -eq 0 ]]; then
# Run in "detached mode" -d (in the background).
(sudo docker compose up --build -d && echo -e "\nThe pdf_aggregation_statistics docker-container started running.\n") || handle_error "Could not build and/or run the 'pdf_aggregation_statistics' docker container!" 4
echo -e "Waiting 55 seconds before getting the status..\n"
sleep 55
echo -e "Waiting 60 seconds before getting the status..\n"
sleep 60
sudo docker ps -a || handle_error "Could not get the status of docker-containers!" 6 # Using -a to get the status of failed containers as well.
echo -e "\n\nGetting the logs of docker-container \"pdf_aggregation_statistics\":\n"
sudo docker logs "$(sudo docker ps -aqf "name=^pdf_aggregation_statistics$")" || handle_error "Could not get the logs of docker-container \"pdf_aggregation_statistics\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log.

View File

@ -1,10 +1,14 @@
package eu.openaire.pdf_aggregation_statistics;
import eu.openaire.pdf_aggregation_statistics.components.SchedulingTasks;
import eu.openaire.pdf_aggregation_statistics.util.UriBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.core.env.Environment;
import org.springframework.scheduling.annotation.EnableScheduling;
@ -12,6 +16,7 @@ import org.springframework.web.cors.CorsConfiguration;
import org.springframework.web.cors.CorsConfigurationSource;
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
import javax.annotation.PreDestroy;
import java.util.Arrays;
import java.util.Collections;
@ -19,8 +24,13 @@ import java.util.Collections;
@EnableScheduling
public class PdfAggregationStatisticsApplication {
private static final Logger logger = LoggerFactory.getLogger(PdfAggregationStatisticsApplication.class);
private static ConfigurableApplicationContext context;
public static void main(String[] args) {
SpringApplication.run(PdfAggregationStatisticsApplication.class, args);
context = SpringApplication.run(PdfAggregationStatisticsApplication.class, args);
}
@ -37,7 +47,23 @@ public class PdfAggregationStatisticsApplication {
}
// TODO - Add GENTLE SHUTDOWN BEAN
public static void gentleAppShutdown()
{
int exitCode = 0;
try {
exitCode = SpringApplication.exit(context, () -> 0); // The "PreDestroy" method will be called. (the "context" will be closed automatically (I checked it))
} catch (IllegalArgumentException iae) {
logger.error(iae.getMessage()); // This will say "Context must not be null", in case the "gentleAppShutdown()" was called too early in the app's lifetime. But it's ok.
}
System.exit(exitCode);
}
@PreDestroy
public void preDestroy() {
// TODO - Add any future shut-down logic here (e.g. shutting down threads).
logger.info("Exiting..");
}
@Bean

View File

@ -1,5 +1,6 @@
package eu.openaire.pdf_aggregation_statistics.components;
import eu.openaire.pdf_aggregation_statistics.PdfAggregationStatisticsApplication;
import eu.openaire.pdf_aggregation_statistics.services.StatsService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -17,21 +18,23 @@ public class SchedulingTasks {
StatsService statsService;
private static boolean runningFirstTime = true;
@Scheduled(initialDelay = 1, fixedDelay = 21_600_000) // Run right after initialization and then every 6 hours.
public void gatherPayloadsPerDatasource()
{
// Request the number of payloads for each datasource and keep them in a ConcurrentHashMap,
// where the "key" will be the "datasourceId" and the "value" will be the numOfPayloads for that datasource.
// When the user requests the numOfPayloads for a given datasourceI, the app will return the rwsult immediately
// It will be a quick O(1) get operation in the HashMap.
// When the user requests the numOfPayloads for a given datasourceI, the app will return the result immediately.
// It will be a quick O(1) GET operation in the ConcurrentHashMap.
statsService.gatherNumberOfPayloadsPerDatasource();
// TODO - If this is the first time running (check a boolean)
// TODO - AND the above method failed, then gently shutdown the app!
// todo - there is no point in keeping it running if it cannot provide any stat!
if ( ! statsService.gatherNumberOfPayloadsPerDatasource()
&& runningFirstTime )
PdfAggregationStatisticsApplication.gentleAppShutdown();
runningFirstTime = false;
}
}

View File

@ -51,14 +51,6 @@ public class StatsController {
// The Map has the numOfPayloads for all datasources, even for newly added ones.
// If the given datasourceID is not found in the map, then either is not a datasource or that datasource is not participating in the OpenAIRE Graph.
if ( StatsServiceImpl.datasourcesWithNumOfPayloads.isEmpty() ) {
errorMsg = "The \"datasourcesWithNumOfPayloads\" map was not populated!";
logger.error(errorMsg);
return ResponseEntity.internalServerError().body(errorMsg);
}
// TODO - The above check is expensive (check the code behind) and unessesery to run every time, since if the map is populated at init then it will not be empty!!
// TODO - if the map was not populated at init then the whole service should not run!! (instead of returning 500-SERVER-ERROR in every request)
Integer numPayloads = StatsServiceImpl.datasourcesWithNumOfPayloads.get(datasourceId);
if ( numPayloads == null )
return ResponseEntity.notFound().build();

View File

@ -3,6 +3,6 @@ package eu.openaire.pdf_aggregation_statistics.services;
public interface StatsService {
void gatherNumberOfPayloadsPerDatasource();
boolean gatherNumberOfPayloadsPerDatasource();
}

View File

@ -28,7 +28,7 @@ public class StatsServiceImpl implements StatsService {
public static final ConcurrentHashMap<String, Integer> datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000.
public void gatherNumberOfPayloadsPerDatasource()
public boolean gatherNumberOfPayloadsPerDatasource()
{
final String getNumberOfPayloadsPerDatasourceQuery =
"select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" +
@ -49,10 +49,13 @@ public class StatsServiceImpl implements StatsService {
}
});
logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated.");
return true;
} catch (EmptyResultDataAccessException erdae) {
logger.warn("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery);
logger.error("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery);
return false;
} catch (Exception e) {
logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e);
return false;
}
}