- Improve performance by avoiding checking if the huge ConcurrentHashMap is empty, on every request. After the first data-gathering, it will neven be empty and the checking-code is expensive.
- In case of an error retrieving data from the DB during initialization, gently shutdown the App. There is no point keeping the app running just to serve 500-Server-Errors. - Code polishing.
This commit is contained in:
parent
64ef1bcf40
commit
7e5c338de3
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
This is a public API to get specific statistics from the PDF Aggregation Service.<br>
|
This is a public API to get specific statistics from the PDF Aggregation Service.<br>
|
||||||
This service is designed to respond as fast as possible, by cashing the relevant data in memory, instead of running new queries to the database, for every request.
|
This service is designed to respond as fast as possible, by cashing the relevant data in memory, instead of running new queries to the database, for every request.
|
||||||
The cached data is updated every 6 hours.
|
**The cached data is updated every 6 hours**.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ dependencies {
|
||||||
exclude group: 'io.netty', module: 'netty'
|
exclude group: 'io.netty', module: 'netty'
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add back some updated version of the needed dependencies.
|
// Add back some updated versions of the needed dependencies.
|
||||||
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
|
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
|
||||||
implementation 'com.fasterxml.woodstox:woodstox-core:6.5.1'
|
implementation 'com.fasterxml.woodstox:woodstox-core:6.5.1'
|
||||||
|
|
||||||
|
|
|
@ -57,8 +57,8 @@ if [[ justInstall -eq 0 ]]; then
|
||||||
# Run in "detached mode" -d (in the background).
|
# Run in "detached mode" -d (in the background).
|
||||||
(sudo docker compose up --build -d && echo -e "\nThe pdf_aggregation_statistics docker-container started running.\n") || handle_error "Could not build and/or run the 'pdf_aggregation_statistics' docker container!" 4
|
(sudo docker compose up --build -d && echo -e "\nThe pdf_aggregation_statistics docker-container started running.\n") || handle_error "Could not build and/or run the 'pdf_aggregation_statistics' docker container!" 4
|
||||||
|
|
||||||
echo -e "Waiting 55 seconds before getting the status..\n"
|
echo -e "Waiting 60 seconds before getting the status..\n"
|
||||||
sleep 55
|
sleep 60
|
||||||
sudo docker ps -a || handle_error "Could not get the status of docker-containers!" 6 # Using -a to get the status of failed containers as well.
|
sudo docker ps -a || handle_error "Could not get the status of docker-containers!" 6 # Using -a to get the status of failed containers as well.
|
||||||
echo -e "\n\nGetting the logs of docker-container \"pdf_aggregation_statistics\":\n"
|
echo -e "\n\nGetting the logs of docker-container \"pdf_aggregation_statistics\":\n"
|
||||||
sudo docker logs "$(sudo docker ps -aqf "name=^pdf_aggregation_statistics$")" || handle_error "Could not get the logs of docker-container \"pdf_aggregation_statistics\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log.
|
sudo docker logs "$(sudo docker ps -aqf "name=^pdf_aggregation_statistics$")" || handle_error "Could not get the logs of docker-container \"pdf_aggregation_statistics\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log.
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
package eu.openaire.pdf_aggregation_statistics;
|
package eu.openaire.pdf_aggregation_statistics;
|
||||||
|
|
||||||
|
import eu.openaire.pdf_aggregation_statistics.components.SchedulingTasks;
|
||||||
import eu.openaire.pdf_aggregation_statistics.util.UriBuilder;
|
import eu.openaire.pdf_aggregation_statistics.util.UriBuilder;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.boot.CommandLineRunner;
|
import org.springframework.boot.CommandLineRunner;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext;
|
import org.springframework.boot.web.servlet.context.ServletWebServerApplicationContext;
|
||||||
|
import org.springframework.context.ConfigurableApplicationContext;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.core.env.Environment;
|
import org.springframework.core.env.Environment;
|
||||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||||
|
@ -12,6 +16,7 @@ import org.springframework.web.cors.CorsConfiguration;
|
||||||
import org.springframework.web.cors.CorsConfigurationSource;
|
import org.springframework.web.cors.CorsConfigurationSource;
|
||||||
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
|
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
|
||||||
|
|
||||||
|
import javax.annotation.PreDestroy;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
|
@ -19,8 +24,13 @@ import java.util.Collections;
|
||||||
@EnableScheduling
|
@EnableScheduling
|
||||||
public class PdfAggregationStatisticsApplication {
|
public class PdfAggregationStatisticsApplication {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(PdfAggregationStatisticsApplication.class);
|
||||||
|
|
||||||
|
|
||||||
|
private static ConfigurableApplicationContext context;
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
SpringApplication.run(PdfAggregationStatisticsApplication.class, args);
|
context = SpringApplication.run(PdfAggregationStatisticsApplication.class, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,7 +47,23 @@ public class PdfAggregationStatisticsApplication {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// TODO - Add GENTLE SHUTDOWN BEAN
|
public static void gentleAppShutdown()
|
||||||
|
{
|
||||||
|
int exitCode = 0;
|
||||||
|
try {
|
||||||
|
exitCode = SpringApplication.exit(context, () -> 0); // The "PreDestroy" method will be called. (the "context" will be closed automatically (I checked it))
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
logger.error(iae.getMessage()); // This will say "Context must not be null", in case the "gentleAppShutdown()" was called too early in the app's lifetime. But it's ok.
|
||||||
|
}
|
||||||
|
System.exit(exitCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@PreDestroy
|
||||||
|
public void preDestroy() {
|
||||||
|
// TODO - Add any future shut-down logic here (e.g. shutting down threads).
|
||||||
|
logger.info("Exiting..");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package eu.openaire.pdf_aggregation_statistics.components;
|
package eu.openaire.pdf_aggregation_statistics.components;
|
||||||
|
|
||||||
|
import eu.openaire.pdf_aggregation_statistics.PdfAggregationStatisticsApplication;
|
||||||
import eu.openaire.pdf_aggregation_statistics.services.StatsService;
|
import eu.openaire.pdf_aggregation_statistics.services.StatsService;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -17,21 +18,23 @@ public class SchedulingTasks {
|
||||||
StatsService statsService;
|
StatsService statsService;
|
||||||
|
|
||||||
|
|
||||||
|
private static boolean runningFirstTime = true;
|
||||||
|
|
||||||
|
|
||||||
@Scheduled(initialDelay = 1, fixedDelay = 21_600_000) // Run right after initialization and then every 6 hours.
|
@Scheduled(initialDelay = 1, fixedDelay = 21_600_000) // Run right after initialization and then every 6 hours.
|
||||||
public void gatherPayloadsPerDatasource()
|
public void gatherPayloadsPerDatasource()
|
||||||
{
|
{
|
||||||
// Request the number of payloads for each datasource and keep them in a ConcurrentHashMap,
|
// Request the number of payloads for each datasource and keep them in a ConcurrentHashMap,
|
||||||
// where the "key" will be the "datasourceId" and the "value" will be the numOfPayloads for that datasource.
|
// where the "key" will be the "datasourceId" and the "value" will be the numOfPayloads for that datasource.
|
||||||
|
|
||||||
// When the user requests the numOfPayloads for a given datasourceI, the app will return the rwsult immediately
|
// When the user requests the numOfPayloads for a given datasourceI, the app will return the result immediately.
|
||||||
// It will be a quick O(1) get operation in the HashMap.
|
// It will be a quick O(1) GET operation in the ConcurrentHashMap.
|
||||||
|
|
||||||
statsService.gatherNumberOfPayloadsPerDatasource();
|
if ( ! statsService.gatherNumberOfPayloadsPerDatasource()
|
||||||
|
&& runningFirstTime )
|
||||||
// TODO - If this is the first time running (check a boolean)
|
PdfAggregationStatisticsApplication.gentleAppShutdown();
|
||||||
// TODO - AND the above method failed, then gently shutdown the app!
|
|
||||||
// todo - there is no point in keeping it running if it cannot provide any stat!
|
|
||||||
|
|
||||||
|
runningFirstTime = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,14 +51,6 @@ public class StatsController {
|
||||||
// The Map has the numOfPayloads for all datasources, even for newly added ones.
|
// The Map has the numOfPayloads for all datasources, even for newly added ones.
|
||||||
// If the given datasourceID is not found in the map, then either is not a datasource or that datasource is not participating in the OpenAIRE Graph.
|
// If the given datasourceID is not found in the map, then either is not a datasource or that datasource is not participating in the OpenAIRE Graph.
|
||||||
|
|
||||||
if ( StatsServiceImpl.datasourcesWithNumOfPayloads.isEmpty() ) {
|
|
||||||
errorMsg = "The \"datasourcesWithNumOfPayloads\" map was not populated!";
|
|
||||||
logger.error(errorMsg);
|
|
||||||
return ResponseEntity.internalServerError().body(errorMsg);
|
|
||||||
}
|
|
||||||
// TODO - The above check is expensive (check the code behind) and unessesery to run every time, since if the map is populated at init then it will not be empty!!
|
|
||||||
// TODO - if the map was not populated at init then the whole service should not run!! (instead of returning 500-SERVER-ERROR in every request)
|
|
||||||
|
|
||||||
Integer numPayloads = StatsServiceImpl.datasourcesWithNumOfPayloads.get(datasourceId);
|
Integer numPayloads = StatsServiceImpl.datasourcesWithNumOfPayloads.get(datasourceId);
|
||||||
if ( numPayloads == null )
|
if ( numPayloads == null )
|
||||||
return ResponseEntity.notFound().build();
|
return ResponseEntity.notFound().build();
|
||||||
|
|
|
@ -3,6 +3,6 @@ package eu.openaire.pdf_aggregation_statistics.services;
|
||||||
|
|
||||||
public interface StatsService {
|
public interface StatsService {
|
||||||
|
|
||||||
void gatherNumberOfPayloadsPerDatasource();
|
boolean gatherNumberOfPayloadsPerDatasource();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ public class StatsServiceImpl implements StatsService {
|
||||||
public static final ConcurrentHashMap<String, Integer> datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000.
|
public static final ConcurrentHashMap<String, Integer> datasourcesWithNumOfPayloads = new ConcurrentHashMap<>(105_000); // The number of datasources is around 10_000.
|
||||||
|
|
||||||
|
|
||||||
public void gatherNumberOfPayloadsPerDatasource()
|
public boolean gatherNumberOfPayloadsPerDatasource()
|
||||||
{
|
{
|
||||||
final String getNumberOfPayloadsPerDatasourceQuery =
|
final String getNumberOfPayloadsPerDatasourceQuery =
|
||||||
"select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" +
|
"select d.id, count(p.id) as payload_count from " + databaseName + ".datasource d\n" +
|
||||||
|
@ -49,10 +49,13 @@ public class StatsServiceImpl implements StatsService {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated.");
|
logger.info("The \"datasourcesWithNumOfPayloads\" map was populated/updated.");
|
||||||
|
return true;
|
||||||
} catch (EmptyResultDataAccessException erdae) {
|
} catch (EmptyResultDataAccessException erdae) {
|
||||||
logger.warn("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery);
|
logger.error("The number of payloads per datasource could not be retrieved from the database \"" + databaseName + "\" using the getNumberOfPayloadsPerDatasourceQuery: " + getNumberOfPayloadsPerDatasourceQuery);
|
||||||
|
return false;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e);
|
logger.error("Problem when executing \"getNumberOfPayloadsPerDatasourceQuery\": " + getNumberOfPayloadsPerDatasourceQuery, e);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue