- Refactor the "shutdown" script to do an orderly-shutdown, by default, by calling the "shutdownService" endpoint. In case a "force-shutdown" is needed, that can be requested with a cmd-argument.

- Fix not updating the "UrlsController.numOfWorkers" correctly.
- Code polishing.
This commit is contained in:
Lampros Smyrnaios 2023-10-23 17:19:29 +03:00
parent 0c7bf6357b
commit bdf834c439
6 changed files with 46 additions and 16 deletions

View File

@ -18,7 +18,7 @@ elif [[ $# -eq 2 ]]; then
justInstall=$1
shouldRunInDocker=$2
elif [[ $# -gt 2 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: installAndRun.sh <justInstall: 0 | 1> <shouldRunInDocker: 0 | 1>"; exit 2
fi
if [[ justInstall -eq 1 && shouldRunInDocker -eq 1 ]]; then
@ -69,7 +69,7 @@ if [[ justInstall -eq 0 ]]; then
# 20 seconds are enough to check if there is an immediate fatal error with one of the docker images. This will cover the problematic configuration case for Prometheus and Grafana containers.
sudo docker ps -a || handle_error "Could not get the status of docker-containers!" 6 # Using -a to get the status of failed containers as well.
echo -e "\n\nGetting the logs of docker-container \"urls_controller\":\n"
sudo docker logs urls_controller -f || handle_error "Could not get the logs of docker-container \"urls_controller\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log.
sudo docker logs -f urls_controller || handle_error "Could not get the logs of docker-container \"urls_controller\"!" 7 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log.
# Use just the container-name and the "-f" parameter to indicate that we want to follow on logs updates, until we specify to unfollow them (with ctrl+c).
# This way we do not need to run the "docker logs" again and again, not checking the the container-id each time.
fi

View File

@ -1,11 +0,0 @@
# This script shuts down (ONLY!) the Controller, by stopping and killing the related containers.
# It is used during testing.
# It does not shuts down the whole service! The workers will keep running and their work will be lost.
echo "Running compose down.."
sudo docker compose -f docker-compose.yml down
sudo docker compose -f ./prometheus/docker-compose-prometheus.yml down
# In case we need to hard-remove the containers, use the following commands:
#sudo docker stop $(sudo docker ps -aqf "name=^(?:urlscontroller-urls_controller|prometheus-(?:prometheus|grafana))-1$") || true # There may be no active containers
#sudo docker rm $(sudo docker ps -aqf "name=^(?:urlscontroller-urls_controller|prometheus-(?:prometheus|grafana))-1$") || true # All containers may be already removed.

40
shutdownService.sh Executable file
View File

@ -0,0 +1,40 @@
# This script shuts down (ONLY!) the Controller, by stopping and killing the related containers.
# It is used during testing.
# It does not shuts down the whole service! The workers will keep running and their work will be lost.
# For error-handling, we cannot use the "set -e" since: it has problems https://mywiki.wooledge.org/BashFAQ/105
# So we have our own function, for use when a single command fails.
handle_error () {
echo -e "\n\n$1\n\n"; exit $2
}
# Change the working directory to the script's directory, when running from another location.
cd "${0%/*}" || handle_error "Could not change-dir to this script's dir!" 1
forceControllerShutdown=0
if [[ $# -eq 1 ]]; then
forceControllerShutdown=$1
elif [[ $# -gt 1 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: shutdownService.sh <forceControllerShutdown: 0 | 1>"; exit 2
fi
# We may have no arguments, if we do not want to force the Controller to shutdown.
# Shutdown Prometheus, if it's running.
sudo docker compose -f ./prometheus/docker-compose-prometheus.yml down
if [[ forceControllerShutdown -eq 1 ]]; then
echo "Shutting down the Controller.."
sudo docker compose -f docker-compose.yml down
else
echo "Shutting down the Service.."
sudo apt install -y curl
curl -X POST -i 'http://localhost:1880/api/shutdownService'
# Follow the logs until shutdown.
sudo docker logs -f urls_controller || handle_error "Could not get the logs of docker-container \"urls_controller\"!" 3 # Using "regex anchors" to avoid false-positives. Works even if the container is not running, thus showing the error-log.
fi
# In case we need to hard-remove the containers, use the following commands:
#sudo docker stop $(sudo docker ps -aqf "name=^(?:urlscontroller-urls_controller|prometheus-(?:prometheus|grafana))-1$") || true # There may be no active containers
#sudo docker rm $(sudo docker ps -aqf "name=^(?:urlscontroller-urls_controller|prometheus-(?:prometheus|grafana))-1$") || true # All containers may be already removed.

View File

@ -104,13 +104,14 @@ public class ShutdownController {
String remoteAddr = GenericUtils.getRequestorAddress(request);
if ( ! remoteAddr.equals(workerInfo.getWorkerIP()) ) {
logger.error(initMsg + " The request came from another IP: " + remoteAddr + " | while this worker was registered with the IP: " + workerInfo.getWorkerIP());
logger.error(initMsg + " The request came from another IP: " + remoteAddr + " | while this worker was registered with this IP: " + workerInfo.getWorkerIP());
return ResponseEntity.status(HttpStatus.FORBIDDEN).build();
}
logger.info(initMsg);
workerInfo.setHasShutdown(true); // This will update the map.
UrlsController.numOfWorkers.decrementAndGet();
// Return "HTTP-OK" to this worker. If this was part of a shutdown-service request, then wait for the scheduler to check and shutdown the service.
return ResponseEntity.ok().build();

View File

@ -120,7 +120,7 @@ public class UrlsController {
if ( workerInfo.getHasShutdown() ) {
logger.info("The worker with id \"" + workerId + "\" was restarted.");
workerInfo.setHasShutdown(false);
numOfWorkers.decrementAndGet();
numOfWorkers.incrementAndGet();
}
} else {
logger.info("The worker \"" + workerId + "\" is requesting assignments for the first time. Going to store its IP [" + remoteAddr + "] in memory.");

View File

@ -145,7 +145,7 @@ public class UrlsServiceImpl implements UrlsService {
final String getAssignmentsQuery = "select * from " + DatabaseConnector.databaseName + ".current_assignment";
List<Assignment> assignments = new ArrayList<>(assignmentsLimit);
final List<Assignment> assignments = new ArrayList<>(assignmentsLimit);
long curAssignmentsBatchCounter = assignmentsBatchCounter.incrementAndGet();