diff --git a/Dockerfile b/Dockerfile index f9d10e5..9973be8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,14 +11,12 @@ ENV CASSANDRA_DC=DC1 ENV CASSANDRA_RACK=RAC1 # Create directory for dump files -RUN mkdir -p /dump +# RUN mkdir -p /dump/snapshot # Copy the CQL script to create the keyspace and tables -COPY data/dev_keyspace_schema.cql /docker-entrypoint-initdb.d/dev_keyspace_schema.cql +# COPY data/dumps/schema/dev_keyspace_1_schema.cql /dump/dev_keyspace_1_schema.cql # Copy the setup script -COPY scripts/setup.sh /setup.sh -RUN chmod +x /setup.sh # Copy cassandra.yaml and cassandra-rackdc.properties COPY cassandra.yaml /etc/cassandra/ @@ -30,8 +28,11 @@ RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp # Substitute environment variables in cassandra-rackdc.properties RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties +# COPY scripts/setup.sh /setup.sh +# VOLUME "/scripts/setup.sh" + # Set the entrypoint -ENTRYPOINT ["/setup.sh"] +ENTRYPOINT ["/scripts/setup.sh"] # Expose Cassandra ports EXPOSE 7000 7001 7199 9042 9160 diff --git a/README.md b/README.md index 0612be7..9f8b8ed 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ run a single service: open bash on server -* cassandra1: `docker exec -it cassandra1 /bin/bash` +* cassandra1: `docker exec -it cassandra1 /bin/bash ` * cassandra2: `docker exec -it cassandra2 /bin/bash` * cassandra3: `docker exec -it cassandra3 /bin/bash` diff --git a/cassandra.yaml b/cassandra.yaml index b74e6fa..c885848 100644 --- a/cassandra.yaml +++ b/cassandra.yaml @@ -21,6 +21,10 @@ commitlog_directory: /var/lib/cassandra/commitlog saved_caches_directory: /var/lib/cassandra/saved_caches +client_encryption_options: + enabled: false + optional: false + # Tokens and allocation num_tokens: ${CASSANDRA_NUM_TOKENS} allocate_tokens_for_local_replication_factor: 3 diff --git a/docker-compose.yml b/docker-compose.yml index 1b62272..462405c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,9 @@ services: - CASSANDRA_RACK=RAC1 - PRIMARY_NODE=true volumes: - - ./data/dumps/node1:/dump + - ./scripts:/scripts + - ./data/dumps/node1:/dump/snapshot + - ./data/dumps/schema:/dump/schema - ./data/volumes/node1:/var/lib/cassandra - ./logs/node1:/var/log/cassandra ports: @@ -35,7 +37,9 @@ services: - CASSANDRA_RACK=RAC1 - PRIMARY_NODE=false volumes: - - ./data/dumps/node2:/dump + - ./scripts:/scripts + - ./data/dumps/node2:/dump/snapshot + - ./data/dumps/schema:/dump/schema - ./data/volumes/node2:/var/lib/cassandra - ./logs/node2:/var/log/cassandra networks: @@ -56,7 +60,9 @@ services: - CASSANDRA_RACK=RAC1 - PRIMARY_NODE=false volumes: - - ./data/dumps/node3:/dump + - ./scripts:/scripts + - ./data/dumps/node3:/dump/snapshot + - ./data/dumps/schema:/dump/schema - ./data/volumes/node3:/var/lib/cassandra - ./logs/node3:/var/log/cassandra networks: diff --git a/dump.sh b/dump.sh index 06880ef..702b86a 100755 --- a/dump.sh +++ b/dump.sh @@ -5,43 +5,75 @@ KEYSPACE="dev_keyspace_1" LOCAL_DIR="data/dumps" NODES=("cass-dev-01" "cass-dev-02" "cass-dev-03") SSH_USER="alfredo.oliviero" -SNAPSHOT_PATH="/data/$KEYSPACE" +DATA_PATH="/data" +SNAPSHOT_PATH="${DATA_PATH}/${KEYSPACE}" NODETOOL="/home/alfredo.oliviero/apache-cassandra-4.1.3/bin/nodetool" CQLSH="/home/alfredo.oliviero/apache-cassandra-4.1.3/bin/cqlsh" CQLSH_IP="10.1.28.100" +DUMP_TAG="dump_docker" + +# Create directory for dumps +mkdir -p $LOCAL_DIR + +# Function to log messages +log() { + local MESSAGE="$1" + echo -e "$MESSAGE" | tee -a "$LOCAL_DIR/dump.log" +} + +# Function to extract table name from table directory +get_table_name() { + local TABLE_PATH="$1" + local TABLE_DIR=$(dirname $(dirname $TABLE_PATH)) + local TABLE_DIR_NAME=$(basename $TABLE_DIR) + local TABLE_NAME=$(echo $TABLE_DIR_NAME | sed 's/-[a-f0-9]\{32\}$//') + echo $TABLE_NAME +} # Function to create snapshot and copy it locally snapshot_and_copy() { NODE=$1 NODE_NAME=$2 - echo "Removing old snapshots on $NODE" - ssh $SSH_USER@$NODE "$NODETOOL clearsnapshot --all -- $KEYSPACE" + rm -rf $LOCAL_DIR/$NODE_NAME - echo "Creating snapshot on $NODE" - ssh $SSH_USER@$NODE "$NODETOOL snapshot $KEYSPACE" + log "Removing old snapshots on $NODE" + ssh $SSH_USER@$NODE "$NODETOOL clearsnapshot -t $DUMP_TAG -- $KEYSPACE" + + log "Creating snapshot on $NODE" + ssh $SSH_USER@$NODE "$NODETOOL snapshot -t $DUMP_TAG $KEYSPACE" # Find the snapshot path and copy it if it exists - TABLES=$(ssh $SSH_USER@$NODE "ls $SNAPSHOT_PATH") - rm -rf $LOCAL_DIR/$NODE_NAME - for TABLE in $TABLES; do - SNAPSHOT_DIR="$SNAPSHOT_PATH/$TABLE/snapshots" - if ssh $SSH_USER@$NODE "test -d $SNAPSHOT_DIR"; then - echo "Copying snapshot for table $TABLE from $NODE to $LOCAL_DIR/$NODE_NAME/$TABLE" - mkdir -p $LOCAL_DIR/$NODE_NAME/$TABLE - rsync -C -r $SSH_USER@$NODE:$SNAPSHOT_DIR/ $LOCAL_DIR/$NODE_NAME/$TABLE - else - echo "No snapshot found for table $TABLE on $NODE" - fi + TABLES=$(ssh $SSH_USER@$NODE "find $SNAPSHOT_PATH -name $DUMP_TAG") + for TABLE_PATH in $TABLES; do + TABLE_NAME=$(get_table_name $TABLE_PATH) + LOCAL_TABLE_DIR="${LOCAL_DIR}/${NODE_NAME}/${TABLE_NAME}" + + log ">> table path $TABLE_PATH\n>> table name $TABLE_NAME\n>> local table dir $LOCAL_TABLE_DIR; " + + mkdir -p $LOCAL_TABLE_DIR/snapshots/$KEYSPACE + + log "Copying snapshot from $NODE:$TABLE_PATH to $LOCAL_TABLE_DIR/$DUMP_TAG" + rsync -C -r $SSH_USER@$NODE:$TABLE_PATH/ $LOCAL_TABLE_DIR/snapshots/$KEYSPACE done } -# Create local directory if it doesn't exist -mkdir -p $LOCAL_DIR +# Function to handle script interruption +cleanup() { + log "Script interrupted. Cleaning up..." + # Add any additional cleanup commands here + exit 1 +} + +# Set trap to catch signals and run cleanup +trap cleanup SIGINT SIGTERM + +log "Starting snapshot creation for keyspace $KEYSPACE" # Export keyspace schema from the first node -echo "Exporting keyspace schema for $KEYSPACE from ${NODES[0]}" -ssh $SSH_USER@${NODES[0]} "$CQLSH $CQLSH_IP -e \"DESCRIBE KEYSPACE $KEYSPACE;\"" > data/${KEYSPACE}_schema.cql +log "Exporting keyspace schema for $KEYSPACE from ${NODES[0]}" +mkdir -p ${LOCAL_DIR}/schema +ssh $SSH_USER@${NODES[0]} "$CQLSH $CQLSH_IP -e 'DESCRIBE KEYSPACE $KEYSPACE;'" > "${LOCAL_DIR}/schema/${KEYSPACE}_schema.cql" # Perform snapshot and copy for each node in parallel NODE_INDEX=1 @@ -53,13 +85,13 @@ done # Wait for all background jobs to finish wait -echo "Backup completed." +log "Backup completed." # Display sizes of dumps -echo "Total size of each node dump directory:" -du -sh data/dumps/node1 -du -sh data/dumps/node2 -du -sh data/dumps/node3 +log "Total size of each node dump directory:" +for NODE_INDEX in {1..3}; do + du -sh "${LOCAL_DIR}/node${NODE_INDEX}" +done -echo "Total size of all dump directories:" -du -sh data/dumps/* +log "Total size of all dump directories:" +du -sh ${LOCAL_DIR}/* diff --git a/scripts/setup.sh b/scripts/setup.sh old mode 100644 new mode 100755 index e90e7bb..826dec4 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -1,65 +1,68 @@ #!/bin/bash -echo -e "RUNNING SETUP" >&2 -KEYSPACE="dev_keyspace" +# Define a logging function +log() { + local MESSAGE="$1" + echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log +} + +log "RUNNING SETUP" + +KEYSPACE="dev_keyspace_1" DUMP_DIR="/dump" # Ensure DUMP_DIR is defined +SNAPSHOT_DIR="$DUMP_DIR/snapshot" # Ensure DUMP_DIR is defined +SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql" # Ensure DUMP_DIR is defined +DUMP_TAG="dump_docker" # Determine the IP address of the current node IP_ADDRESS=$(hostname -I | awk '{print $1}') -# Print the IP address for debugging -echo -e "Node IP Address: $IP_ADDRESS" >&2 +# Log the IP address for debugging +log "Node IP Address: $IP_ADDRESS" # Wait for cassandra1 to be ready if this is not the primary node if [ "$PRIMARY_NODE" != "true" ]; then - echo -e "Waiting for cassandra1 to be ready..." >&2 - /wait-for-it.sh cassandra1:9042 -t 60 -- echo "cassandra1 is ready" >&2 + log "Waiting for cassandra1 to be ready..." + /wait-for-it.sh cassandra1:9042 -t 60 -- log "cassandra1 is ready" fi # Start Cassandra in the background cassandra -R & # Wait for Cassandra to be ready -echo -e "Waiting for Cassandra to start..." >&2 +log "Waiting for Cassandra to start..." until cqlsh $IP_ADDRESS -e "SHOW HOST" > /dev/null 2>&1; do sleep 2 done -# Print the value of PRIMARY_NODE for debugging -echo -e "PRIMARY_NODE is set to: $PRIMARY_NODE" >&2 +# Log the value of PRIMARY_NODE for debugging +log "PRIMARY_NODE is set to: $PRIMARY_NODE" if [ "$PRIMARY_NODE" = "true" ]; then # Check if the keyspace exists - echo -e "Checking if keyspace $KEYSPACE exists..." >&2 + log "Checking if keyspace $KEYSPACE exists..." if ! cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then - echo -e "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..." >&2 - cqlsh $IP_ADDRESS -f /docker-entrypoint-initdb.d/dev_keyspace_schema.cql + log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..." + cqlsh $IP_ADDRESS -f $SCHEMA_PATH else - echo -e "Keyspace $KEYSPACE already exists. Ensuring tables exist..." >&2 - - # Manually define the schema for legacy tables - cqlsh $IP_ADDRESS -e "CREATE TABLE IF NOT EXISTS dev_keyspace.hashtaggedposts ( - id UUID PRIMARY KEY, - -- other columns - );" - # Add similar statements for all other tables + log "Keyspace $KEYSPACE already exists. Ensuring tables exist..." fi else - echo -e "This is not the primary node. Skipping keyspace and table creation." >&2 + log "This is not the primary node. Skipping keyspace and table creation." fi -# Copy snapshots to the Cassandra data directory -echo -e "Copying snapshots..." >&2 -cp -r $DUMP_DIR/* /var/lib/cassandra/data/ - -# Import snapshots into the Cassandra data directory -echo -e "Importing snapshots..." >&2 -for table_dir in $(ls $DUMP_DIR); do - table_name=$(echo $table_dir | sed 's/-[a-f0-9]\{32\}$//') - echo -e "Importing table: $table_name from directory: $table_dir" >&2 - echo -e "Command: nodetool import $KEYSPACE $table_name /var/lib/cassandra/data/$KEYSPACE/$table_dir" >&2 - nodetool import $KEYSPACE $table_name /var/lib/cassandra/data/$KEYSPACE/$table_dir +# Import snapshots using sstableloader +log "Importing snapshots using sstableloader..." +for TABLE_DIR in $(ls $SNAPSHOT_DIR); do + TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name + echo "Importing table: $TABLE_NAME from directory: $TABLE_DIR" + echo "Command: sstableloader -d $CASSANDRA_SEEDS $SNAPSHOT_DIR/$TABLE_DIR" + # sstableloader -d $CASSANDRA_SEEDS $SNAPSHOT_DIR/$TABLE_DIR -ks $KEYSPACE + nodetool import -- $KEYSPACE $TABLE_NAME $SNAPSHOT_DIR/$TABLE_DIR/snapshots/$KEYSPACE + cqlsh $IP_ADDRESS -k $KEYSPACE -e "select count(*) from $TABLE_NAME;" >&2 done +log "FINISHED IMPORT" + # Keep the container running tail -f /dev/null