dockerizing_cassandra/dump.sh

101 lines
2.8 KiB
Bash
Raw Permalink Normal View History

2024-07-24 15:03:39 +02:00
#!/bin/bash
# Configuration
2024-07-25 15:24:11 +02:00
KEYSPACE="dev_keyspace_1"
2024-07-24 15:03:39 +02:00
LOCAL_DIR="data/dumps"
NODES=("cass-dev-01" "cass-dev-02" "cass-dev-03")
SSH_USER="alfredo.oliviero"
2024-07-30 17:03:33 +02:00
DATA_PATH="/data"
SNAPSHOT_PATH="${DATA_PATH}/${KEYSPACE}"
2024-07-24 15:03:39 +02:00
NODETOOL="/home/alfredo.oliviero/apache-cassandra-4.1.3/bin/nodetool"
CQLSH="/home/alfredo.oliviero/apache-cassandra-4.1.3/bin/cqlsh"
CQLSH_IP="10.1.28.100"
2024-07-30 17:03:33 +02:00
DUMP_TAG="dump_docker"
NODE=cass-dev-01
NODE_NAME=node1
2024-07-30 17:03:33 +02:00
# Create directory for dumps
mkdir -p $LOCAL_DIR
# Function to log messages
log() {
local MESSAGE="$1"
echo -e "$MESSAGE" | tee -a "$LOCAL_DIR/dump.log"
}
# Function to extract table name from table directory
get_table_name() {
local TABLE_PATH="$1"
local TABLE_DIR=$(dirname $(dirname $TABLE_PATH))
local TABLE_DIR_NAME=$(basename $TABLE_DIR)
local TABLE_NAME=$(echo $TABLE_DIR_NAME | sed 's/-[a-f0-9]\{32\}$//')
echo $TABLE_NAME
}
2024-07-24 15:03:39 +02:00
# Function to create snapshot and copy it locally
snapshot_and_copy() {
NODE=$1
NODE_NAME=$2
2024-07-30 17:03:33 +02:00
rm -rf $LOCAL_DIR/$NODE_NAME
log "Removing old snapshots on $NODE"
ssh $SSH_USER@$NODE "sudo $NODETOOL clearsnapshot -t $DUMP_TAG -- $KEYSPACE"
2024-07-24 15:03:39 +02:00
2024-07-30 17:03:33 +02:00
log "Creating snapshot on $NODE"
ssh $SSH_USER@$NODE "sudo $NODETOOL snapshot -t $DUMP_TAG $KEYSPACE"
2024-07-24 15:03:39 +02:00
# Find the snapshot path and copy it if it exists
2024-07-30 17:03:33 +02:00
TABLES=$(ssh $SSH_USER@$NODE "find $SNAPSHOT_PATH -name $DUMP_TAG")
for TABLE_PATH in $TABLES; do
TABLE_NAME=$(get_table_name $TABLE_PATH)
LOCAL_TABLE_DIR="${LOCAL_DIR}/${NODE_NAME}/${TABLE_NAME}"
log ">> table path $TABLE_PATH\n>> table name $TABLE_NAME\n>> local table dir $LOCAL_TABLE_DIR; "
mkdir -p $LOCAL_TABLE_DIR
2024-07-30 17:03:33 +02:00
log "Copying snapshot from $NODE:$TABLE_PATH to $LOCAL_TABLE_DIR/$DUMP_TAG"
rsync -C -r $SSH_USER@$NODE:$TABLE_PATH/ $LOCAL_TABLE_DIR/
2024-07-24 15:03:39 +02:00
done
}
2024-07-30 17:03:33 +02:00
# Function to handle script interruption
cleanup() {
log "Script interrupted. Cleaning up..."
# Add any additional cleanup commands here
exit 1
}
# Set trap to catch signals and run cleanup
trap cleanup SIGINT SIGTERM
log "Starting snapshot creation for keyspace $KEYSPACE"
2024-07-25 15:24:11 +02:00
2024-07-24 15:03:39 +02:00
# Export keyspace schema from the first node
2024-07-30 17:03:33 +02:00
log "Exporting keyspace schema for $KEYSPACE from ${NODES[0]}"
mkdir -p ${LOCAL_DIR}/schema
2024-07-24 15:03:39 +02:00
ssh $SSH_USER@${NODES[0]} "sudo $CQLSH $CQLSH_IP -e 'DESCRIBE KEYSPACE $KEYSPACE;'" > "${LOCAL_DIR}/schema/${KEYSPACE}_schema.cql"
2024-07-24 15:03:39 +02:00
# Perform snapshot and copy for each node in parallel
NODE_INDEX=1
for NODE in "${NODES[@]}"; do
snapshot_and_copy $NODE "node$NODE_INDEX" &
NODE_INDEX=$((NODE_INDEX + 1))
done
# Wait for all background jobs to finish
wait
2024-07-30 17:03:33 +02:00
log "Backup completed."
2024-07-24 15:03:39 +02:00
# Display sizes of dumps
2024-07-30 17:03:33 +02:00
log "Total size of each node dump directory:"
for NODE_INDEX in {1..3}; do
du -sh "${LOCAL_DIR}/node${NODE_INDEX}"
done
2024-07-24 15:03:39 +02:00
2024-07-30 17:03:33 +02:00
log "Total size of all dump directories:"
du -sh ${LOCAL_DIR}/*