2024-07-24 15:03:39 +02:00
|
|
|
#!/bin/bash
|
|
|
|
|
2024-07-30 17:03:33 +02:00
|
|
|
# Define a logging function
|
|
|
|
log() {
|
|
|
|
local MESSAGE="$1"
|
|
|
|
echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log
|
|
|
|
}
|
|
|
|
|
|
|
|
log "RUNNING SETUP"
|
|
|
|
|
2024-08-01 11:25:40 +02:00
|
|
|
# Configuration
|
2024-07-30 17:03:33 +02:00
|
|
|
KEYSPACE="dev_keyspace_1"
|
2024-07-24 18:05:37 +02:00
|
|
|
DUMP_DIR="/dump" # Ensure DUMP_DIR is defined
|
2024-08-01 11:25:40 +02:00
|
|
|
SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"
|
2024-07-31 14:00:24 +02:00
|
|
|
CASSANDRA_SEEDS="cassandra1,cassandra2,cassandra3"
|
2024-07-24 18:05:37 +02:00
|
|
|
IP_ADDRESS=$(hostname -I | awk '{print $1}')
|
2024-08-01 11:25:40 +02:00
|
|
|
DATA_DIR="/var/lib/cassandra/data/$KEYSPACE"
|
|
|
|
SNAPSHOT_DIR="$DUMP_DIR/snapshot"
|
|
|
|
PRIMARY_NODE=${PRIMARY_NODE:-false} # Default to false if not set
|
|
|
|
SLEEP_DURATION=5 # Sleep duration in seconds for waits
|
|
|
|
TIMEOUT=3000 # Timeout in seconds for waits
|
2024-07-24 18:05:37 +02:00
|
|
|
|
2024-07-31 14:00:24 +02:00
|
|
|
# Initialize SEEDS array
|
|
|
|
SEEDS=(${CASSANDRA_SEEDS//,/ })
|
|
|
|
|
2024-08-01 11:25:40 +02:00
|
|
|
# Function to wait for a command to succeed
|
|
|
|
wait_for_command() {
|
|
|
|
local COMMAND="$1"
|
|
|
|
local TIMEOUT="$2"
|
|
|
|
local START_TIME=$(date +%s)
|
|
|
|
local END_TIME=$((START_TIME + TIMEOUT))
|
|
|
|
|
2024-07-31 14:00:24 +02:00
|
|
|
while true; do
|
2024-08-01 11:25:40 +02:00
|
|
|
if eval "$COMMAND"; then
|
|
|
|
log "Command succeeded: $COMMAND"
|
2024-07-31 14:00:24 +02:00
|
|
|
break
|
|
|
|
else
|
2024-08-01 11:25:40 +02:00
|
|
|
local CURRENT_TIME=$(date +%s)
|
|
|
|
if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
|
|
|
|
log "Timed out waiting for command: $COMMAND"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
log "Command failed: $COMMAND, still waiting"
|
|
|
|
sleep $SLEEP_DURATION
|
2024-07-31 14:00:24 +02:00
|
|
|
fi
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
2024-08-01 11:25:40 +02:00
|
|
|
# Function to check if a node is up
|
|
|
|
is_node_up() {
|
|
|
|
local NODE="$1"
|
|
|
|
local NODE_STATUS=$(nodetool status -r)
|
|
|
|
if echo "$NODE_STATUS" | grep -E "^UN" | grep "$NODE" > /dev/null; then
|
|
|
|
return 0
|
|
|
|
elif [ "$NODE" = "$CASSANDRA_RPC_ADDRESS" ]; then
|
|
|
|
NODE_STATUS=$(nodetool status)
|
|
|
|
if echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
|
|
|
|
return 0
|
2024-07-31 14:00:24 +02:00
|
|
|
fi
|
2024-08-01 11:25:40 +02:00
|
|
|
fi
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
|
|
|
|
# Function to wait for all nodes to be up
|
|
|
|
wait_for_all_nodes_up() {
|
|
|
|
for seed in "${SEEDS[@]}"; do
|
|
|
|
wait_for_command "is_node_up $seed" $TIMEOUT
|
2024-07-31 14:00:24 +02:00
|
|
|
done
|
2024-08-01 11:25:40 +02:00
|
|
|
log "All nodes are up."
|
|
|
|
}
|
|
|
|
|
|
|
|
# Function to check for schema agreement and if schema exists
|
|
|
|
is_schema_agreed() {
|
|
|
|
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
|
|
|
|
SCHEMA_NODES=$(nodetool describecluster | grep -A 1 "Schema versions:" | grep -o '\[.*\]' | tr -d '[]' | tr ',' '\n' | wc -l)
|
|
|
|
if [ "$SCHEMA_NODES" -eq "${#SEEDS[@]}" ]; then
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
|
|
|
|
# Function to check if keyspace exists
|
|
|
|
is_keyspace_exists() {
|
|
|
|
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
return 1
|
2024-07-31 14:00:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
log "setup KEYSPACE: $KEYSPACE"
|
|
|
|
log "setup DUMP_DIR: $DUMP_DIR"
|
|
|
|
log "setup SCHEMA_PATH: $SCHEMA_PATH"
|
|
|
|
log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS"
|
2024-08-01 11:25:40 +02:00
|
|
|
|
|
|
|
# Check if the keyspace directory exists and is not empty
|
|
|
|
if [ -d "$DATA_DIR" ] && [ "$(ls -A $DATA_DIR)" ]; then
|
|
|
|
log "Data directory $DATA_DIR exists and is not empty. Skipping schema creation and data import."
|
|
|
|
SCHEMA_CREATED=false
|
|
|
|
else
|
|
|
|
log "Data directory $DATA_DIR does not exist or is empty. Proceeding with schema creation and data import."
|
|
|
|
SCHEMA_CREATED=true
|
|
|
|
fi
|
2024-07-24 18:05:37 +02:00
|
|
|
|
|
|
|
# Wait for cassandra1 to be ready if this is not the primary node
|
|
|
|
if [ "$PRIMARY_NODE" != "true" ]; then
|
2024-08-01 11:25:40 +02:00
|
|
|
wait_for_service cassandra1 9042 $TIMEOUT
|
2024-07-24 18:05:37 +02:00
|
|
|
fi
|
2024-07-24 15:03:39 +02:00
|
|
|
|
|
|
|
# Start Cassandra in the background
|
2024-08-01 11:25:40 +02:00
|
|
|
# cassandra -R &
|
2024-07-24 15:03:39 +02:00
|
|
|
|
|
|
|
# Wait for Cassandra to be ready
|
2024-08-01 11:25:40 +02:00
|
|
|
wait_for_command "cqlsh $IP_ADDRESS -e 'SHOW HOST' > /dev/null 2>&1" $TIMEOUT
|
2024-07-24 15:03:39 +02:00
|
|
|
|
2024-07-30 17:03:33 +02:00
|
|
|
# Log the value of PRIMARY_NODE for debugging
|
|
|
|
log "PRIMARY_NODE is set to: $PRIMARY_NODE"
|
2024-07-24 18:05:37 +02:00
|
|
|
|
2024-07-31 14:00:24 +02:00
|
|
|
# Step 1: Wait for all nodes to be up and ready
|
|
|
|
log "Waiting for all nodes to be up and ready..."
|
|
|
|
wait_for_all_nodes_up
|
|
|
|
|
|
|
|
# Step 2: Create keyspace and schema on the primary node
|
|
|
|
if [ "$PRIMARY_NODE" = "true" ]; then
|
|
|
|
log "Checking if keyspace $KEYSPACE exists..."
|
2024-08-01 11:25:40 +02:00
|
|
|
if ! is_keyspace_exists; then
|
2024-07-31 14:00:24 +02:00
|
|
|
log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..."
|
|
|
|
cqlsh $IP_ADDRESS -f "$SCHEMA_PATH"
|
|
|
|
else
|
|
|
|
log "Keyspace $KEYSPACE already exists. Ensuring tables exist..."
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Step 3: Wait for schema to be created and agreed upon across all nodes
|
|
|
|
log "Waiting for schema agreement across all nodes..."
|
2024-08-01 11:25:40 +02:00
|
|
|
wait_for_command "is_schema_agreed" $TIMEOUT
|
2024-07-31 14:00:24 +02:00
|
|
|
|
|
|
|
# Step 4: Import data using sstableloader if not previously imported
|
2024-08-01 11:25:40 +02:00
|
|
|
# if [ "$SCHEMA_CREATED" = true ]; then
|
2024-07-31 14:00:24 +02:00
|
|
|
log "Importing snapshots using sstableloader..."
|
2024-08-01 11:25:40 +02:00
|
|
|
for TABLE_DIR in $(ls $SNAPSHOT_DIR); do
|
2024-07-31 14:00:24 +02:00
|
|
|
TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name
|
2024-08-01 11:25:40 +02:00
|
|
|
log "Importing table: $TABLE_NAME from directory: $SNAPSHOT_DIR/$TABLE_DIR"
|
|
|
|
sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$SNAPSHOT_DIR/$TABLE_DIR"
|
2024-07-31 14:00:24 +02:00
|
|
|
cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2
|
|
|
|
done
|
2024-08-01 11:25:40 +02:00
|
|
|
# fi
|
2024-07-31 14:00:24 +02:00
|
|
|
|
|
|
|
log "FINISHED IMPORT"
|
2024-07-30 17:03:33 +02:00
|
|
|
|
2024-07-24 15:03:39 +02:00
|
|
|
# Keep the container running
|
2024-08-01 11:25:40 +02:00
|
|
|
# tail -f /dev/null
|