dockerizing_cassandra/scripts/setup.sh

154 lines
4.7 KiB
Bash
Raw Normal View History

2024-07-24 15:03:39 +02:00
#!/bin/bash
2024-07-30 17:03:33 +02:00
# Define a logging function
log() {
local MESSAGE="$1"
echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log
}
log "RUNNING SETUP"
# Configuration
2024-07-30 17:03:33 +02:00
KEYSPACE="dev_keyspace_1"
DUMP_DIR="/dump" # Ensure DUMP_DIR is defined
SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"
CASSANDRA_SEEDS="cassandra1,cassandra2,cassandra3"
IP_ADDRESS=$(hostname -I | awk '{print $1}')
DATA_DIR="/var/lib/cassandra/data/$KEYSPACE"
SNAPSHOT_DIR="$DUMP_DIR/snapshot"
PRIMARY_NODE=${PRIMARY_NODE:-false} # Default to false if not set
SLEEP_DURATION=5 # Sleep duration in seconds for waits
TIMEOUT=3000 # Timeout in seconds for waits
# Initialize SEEDS array
SEEDS=(${CASSANDRA_SEEDS//,/ })
# Function to wait for a command to succeed
wait_for_command() {
local COMMAND="$1"
local TIMEOUT="$2"
local START_TIME=$(date +%s)
local END_TIME=$((START_TIME + TIMEOUT))
while true; do
if eval "$COMMAND"; then
log "Command succeeded: $COMMAND"
break
else
local CURRENT_TIME=$(date +%s)
if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
log "Timed out waiting for command: $COMMAND"
exit 1
fi
log "Command failed: $COMMAND, still waiting"
sleep $SLEEP_DURATION
fi
done
}
# Function to check if a node is up
is_node_up() {
local NODE="$1"
local NODE_STATUS=$(nodetool status -r)
if echo "$NODE_STATUS" | grep -E "^UN" | grep "$NODE" > /dev/null; then
return 0
elif [ "$NODE" = "$CASSANDRA_RPC_ADDRESS" ]; then
NODE_STATUS=$(nodetool status)
if echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
return 0
fi
fi
return 1
}
# Function to wait for all nodes to be up
wait_for_all_nodes_up() {
for seed in "${SEEDS[@]}"; do
wait_for_command "is_node_up $seed" $TIMEOUT
done
log "All nodes are up."
}
# Function to check for schema agreement and if schema exists
is_schema_agreed() {
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
SCHEMA_NODES=$(nodetool describecluster | grep -A 1 "Schema versions:" | grep -o '\[.*\]' | tr -d '[]' | tr ',' '\n' | wc -l)
if [ "$SCHEMA_NODES" -eq "${#SEEDS[@]}" ]; then
return 0
fi
fi
return 1
}
# Function to check if keyspace exists
is_keyspace_exists() {
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
return 0
fi
return 1
}
log "setup KEYSPACE: $KEYSPACE"
log "setup DUMP_DIR: $DUMP_DIR"
log "setup SCHEMA_PATH: $SCHEMA_PATH"
log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS"
# Check if the keyspace directory exists and is not empty
if [ -d "$DATA_DIR" ] && [ "$(ls -A $DATA_DIR)" ]; then
log "Data directory $DATA_DIR exists and is not empty. Skipping schema creation and data import."
SCHEMA_CREATED=false
else
log "Data directory $DATA_DIR does not exist or is empty. Proceeding with schema creation and data import."
SCHEMA_CREATED=true
fi
# Wait for cassandra1 to be ready if this is not the primary node
if [ "$PRIMARY_NODE" != "true" ]; then
wait_for_service cassandra1 9042 $TIMEOUT
fi
2024-07-24 15:03:39 +02:00
# Start Cassandra in the background
# cassandra -R &
2024-07-24 15:03:39 +02:00
# Wait for Cassandra to be ready
wait_for_command "cqlsh $IP_ADDRESS -e 'SHOW HOST' > /dev/null 2>&1" $TIMEOUT
2024-07-24 15:03:39 +02:00
2024-07-30 17:03:33 +02:00
# Log the value of PRIMARY_NODE for debugging
log "PRIMARY_NODE is set to: $PRIMARY_NODE"
# Step 1: Wait for all nodes to be up and ready
log "Waiting for all nodes to be up and ready..."
wait_for_all_nodes_up
# Step 2: Create keyspace and schema on the primary node
if [ "$PRIMARY_NODE" = "true" ]; then
log "Checking if keyspace $KEYSPACE exists..."
if ! is_keyspace_exists; then
log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..."
cqlsh $IP_ADDRESS -f "$SCHEMA_PATH"
else
log "Keyspace $KEYSPACE already exists. Ensuring tables exist..."
fi
fi
# Step 3: Wait for schema to be created and agreed upon across all nodes
log "Waiting for schema agreement across all nodes..."
wait_for_command "is_schema_agreed" $TIMEOUT
# Step 4: Import data using sstableloader if not previously imported
# if [ "$SCHEMA_CREATED" = true ]; then
log "Importing snapshots using sstableloader..."
for TABLE_DIR in $(ls $SNAPSHOT_DIR); do
TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name
log "Importing table: $TABLE_NAME from directory: $SNAPSHOT_DIR/$TABLE_DIR"
sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$SNAPSHOT_DIR/$TABLE_DIR"
cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2
done
# fi
log "FINISHED IMPORT"
2024-07-30 17:03:33 +02:00
2024-07-24 15:03:39 +02:00
# Keep the container running
# tail -f /dev/null