tutto funzionante. /scripts/setup.sh deve essere avviato a mano su ogni stanza, la prima volta che si avviano i docker

This commit is contained in:
Alfredo Oliviero 2024-08-01 11:25:40 +02:00
parent 869802c6e0
commit 470416125e
16 changed files with 643 additions and 97 deletions

View File

@ -2,7 +2,7 @@
FROM cassandra:4.1.3 FROM cassandra:4.1.3
# Install gettext to use envsubst # Install gettext to use envsubst
RUN apt-get update && apt-get install -y iputils-ping less locate gettext-base # RUN apt-get update && apt-get install -y iputils-ping less locate gettext-base
# Environment variables to configure Cassandra # Environment variables to configure Cassandra
ENV CASSANDRA_CLUSTER_NAME=TestCluster ENV CASSANDRA_CLUSTER_NAME=TestCluster
@ -20,19 +20,19 @@ ENV CASSANDRA_RACK=RAC1
# Copy cassandra.yaml and cassandra-rackdc.properties # Copy cassandra.yaml and cassandra-rackdc.properties
COPY cassandra.yaml /etc/cassandra/ COPY cassandra.yaml /etc/cassandra/
COPY cassandra-rackdc.properties /etc/cassandra/ # COPY cassandra-rackdc.properties /etc/cassandra/
# Substitute environment variables in cassandra.yaml # Substitute environment variables in cassandra.yaml
RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp && mv /etc/cassandra/cassandra.yaml.tmp /etc/cassandra/cassandra.yaml # RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp && mv /etc/cassandra/cassandra.yaml.tmp /etc/cassandra/cassandra.yaml
# Substitute environment variables in cassandra-rackdc.properties # Substitute environment variables in cassandra-rackdc.properties
RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties # RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties
# COPY scripts/setup.sh /setup.sh # COPY scripts/setup.sh /setup.sh
# VOLUME "/scripts/setup.sh" # VOLUME "/scripts/setup.sh"
# Set the entrypoint # Set the entrypoint
ENTRYPOINT ["/scripts/setup.sh"] # ENTRYPOINT ["/scripts/setup.sh"]
# Expose Cassandra ports # Expose Cassandra ports
EXPOSE 7000 7001 7199 9042 9160 EXPOSE 7000 7001 7199 9042 9160

View File

@ -8,8 +8,10 @@ in dev environment, cassandra has been installed manually and nodetool is not in
* dev should be aliegned to prod * dev should be aliegned to prod
https://medium.com/@kayvan.sol2/deploying-apache-cassandra-cluster-3-nodes-with-docker-compose-3634ef8345e8
# DOCKER
## DOCKER instructions
aggiungere a /etc/host l'entry: aggiungere a /etc/host l'entry:
@ -23,13 +25,26 @@ force recreate docker image
`docker compose build --no-cache` `docker compose build --no-cache`
check status: check status:
`docker exec -it cassandra-cassandra-1 nodetool status` `docker exec -it cassandra-1 nodetool status`
## import db
`./dump.sh`
`docker compose up --build`
attendere che tutto sia pronto e i db siano sincronizzati
TODO: definire esattamente come, tendenzialmente con nodetool status , nodetool gossip, etc
eseguire uno dopo l'altro, quando sono terminati
* cassandra1: `docker exec -it cassandra1 /scripts/setup`
* cassandra2: `docker exec -it cassandra2 /scripts/setup`
* cassandra3: `docker exec -it cassandra3 /scripts/setup`
run a single service: run a single service:
* cassandra1: `docker-compose up cassandra-cassandra1 --build` * cassandra1: `docker-compose up cassandra1 --build`
* cassandra2: `docker-compose up cassandra-cassandra2 --build` * cassandra2: `docker-compose up cassandra2 --build`
* ,... * ,...
open bash on server open bash on server
@ -38,4 +53,27 @@ open bash on server
* cassandra2: `docker exec -it cassandra2 /bin/bash` * cassandra2: `docker exec -it cassandra2 /bin/bash`
* cassandra3: `docker exec -it cassandra3 /bin/bash` * cassandra3: `docker exec -it cassandra3 /bin/bash`
## check cassandra status
Check status
`nodetool status`
Check if the Gossip protocol is enabled
`nodetool info | grep -i gossip`
Check the status of the Gossip protocol
`nodetool gossipinfo`
Check the communication between nodes
`nodetool ring`
## Documentation
* [cassandra dump data](docs/dump.md)
* [cassandra setup and import](docs/setup.md)

View File

@ -1,11 +1,11 @@
# Cluster name # # Cluster name
cluster_name: ${CASSANDRA_CLUSTER_NAME} # cluster_name: ${CASSANDRA_CLUSTER_NAME}
# Addresses # # Addresses
listen_address: ${CASSANDRA_LISTEN_ADDRESS} # listen_address: ${CASSANDRA_LISTEN_ADDRESS}
broadcast_address: ${CASSANDRA_BROADCAST_ADDRESS} # broadcast_address: ${CASSANDRA_BROADCAST_ADDRESS}
# rpc_address: 0.0.0.0 # rpc_address: 0.0.0.0
broadcast_rpc_address: ${CASSANDRA_RPC_ADDRESS} # broadcast_rpc_address: ${CASSANDRA_RPC_ADDRESS}
# Seed nodes # Seed nodes
seed_provider: seed_provider:
@ -21,12 +21,12 @@ commitlog_directory: /var/lib/cassandra/commitlog
saved_caches_directory: /var/lib/cassandra/saved_caches saved_caches_directory: /var/lib/cassandra/saved_caches
client_encryption_options: # client_encryption_options:
enabled: false # enabled: false
optional: false # optional: false
# Tokens and allocation # Tokens and allocation
num_tokens: ${CASSANDRA_NUM_TOKENS} # num_tokens: ${CASSANDRA_NUM_TOKENS}
allocate_tokens_for_local_replication_factor: 3 allocate_tokens_for_local_replication_factor: 3
# Hinted handoff settings # Hinted handoff settings
@ -122,7 +122,8 @@ request_timeout: 1000000ms
slow_query_log_timeout: 500ms slow_query_log_timeout: 500ms
# Snitch settings # Snitch settings
endpoint_snitch: GossipingPropertyFileSnitch # endpoint_snitch: GossipingPropertyFileSnitch
endpoint_snitch: SimpleSnitch
dynamic_snitch_update_interval: 100ms dynamic_snitch_update_interval: 100ms
dynamic_snitch_reset_interval: 600000ms dynamic_snitch_reset_interval: 600000ms
dynamic_snitch_badness_threshold: 1.0 dynamic_snitch_badness_threshold: 1.0

View File

@ -1,5 +1,3 @@
version: '3.8'
services: services:
cassandra1: cassandra1:
build: . build: .
@ -18,11 +16,20 @@ services:
- ./data/dumps/node1:/dump/snapshot - ./data/dumps/node1:/dump/snapshot
- ./data/dumps/schema:/dump/schema - ./data/dumps/schema:/dump/schema
- ./data/volumes/node1:/var/lib/cassandra - ./data/volumes/node1:/var/lib/cassandra
- ./logs/node1:/var/log/cassandra - ./data/logs/node1:/var/log/cassandra
healthcheck:
test: ["CMD-SHELL", "nodetool status"]
interval: 2m
start_period: 2m
timeout: 10s
retries: 3
ports: ports:
- "9042:9042" - "9042:9042"
networks: networks:
- cassandra-net - cassandra-net
restart:
on-failure
cassandra2: cassandra2:
build: . build: .
@ -36,16 +43,25 @@ services:
- CASSANDRA_DC=DC1 - CASSANDRA_DC=DC1
- CASSANDRA_RACK=RAC1 - CASSANDRA_RACK=RAC1
- PRIMARY_NODE=false - PRIMARY_NODE=false
healthcheck:
test: ["CMD-SHELL", "nodetool status"]
interval: 2m
start_period: 2m
timeout: 10s
retries: 3
volumes: volumes:
- ./scripts:/scripts - ./scripts:/scripts
- ./data/dumps/node2:/dump/snapshot - ./data/dumps/node2:/dump/snapshot
- ./data/dumps/schema:/dump/schema - ./data/dumps/schema:/dump/schema
- ./data/volumes/node2:/var/lib/cassandra - ./data/volumes/node2:/var/lib/cassandra
- ./logs/node2:/var/log/cassandra - ./data/logs/node2:/var/log/cassandra
networks: networks:
- cassandra-net - cassandra-net
depends_on: depends_on:
- cassandra1 cassandra1:
condition: service_healthy
restart:
on-failure
cassandra3: cassandra3:
build: . build: .
@ -59,16 +75,27 @@ services:
- CASSANDRA_DC=DC1 - CASSANDRA_DC=DC1
- CASSANDRA_RACK=RAC1 - CASSANDRA_RACK=RAC1
- PRIMARY_NODE=false - PRIMARY_NODE=false
healthcheck:
test: ["CMD-SHELL", "nodetool status"]
interval: 2m
start_period: 2m
timeout: 10s
retries: 3
volumes: volumes:
- ./scripts:/scripts - ./scripts:/scripts
- ./data/dumps/node3:/dump/snapshot - ./data/dumps/node3:/dump/snapshot
- ./data/dumps/schema:/dump/schema - ./data/dumps/schema:/dump/schema
- ./data/volumes/node3:/var/lib/cassandra - ./data/volumes/node3:/var/lib/cassandra
- ./logs/node3:/var/log/cassandra - ./data/logs/node3:/var/log/cassandra
networks: networks:
- cassandra-net - cassandra-net
depends_on: depends_on:
- cassandra2 cassandra2:
condition: service_healthy
restart:
on-failure
networks: networks:
cassandra-net: cassandra-net:

121
docs/dump.md Normal file
View File

@ -0,0 +1,121 @@
# Documentation: Exporting Data from Existing Cassandra Cluster
This process exports data from an existing Cassandra cluster by creating snapshots on each node and copying the data to a local directory.
The steps ensure a consistent and reliable backup of the keyspace data.
The snapshot creation and data synchronization steps are executed in parallel for all nodes to speed up the process and ensure consistency.
## Dump Process
The data dump process involves taking a snapshot of the keyspace from each Cassandra node, copying the snapshots locally, and exporting the keyspace schema. This process is performed in parallel for efficiency.
1. **Clear Old Snapshots:**
- For each node, remove any existing snapshots with the specified tag to ensure a clean state.
2. **Create New Snapshots:**
- For each node, create a new snapshot with the specified tag.
3. **Synchronize Snapshots Locally:**
- Copy the snapshot data from each node to the local directory. Each table's data is copied into a directory named after the table.
4. **Export Keyspace Schema:**
- Export the keyspace schema from the first node and save it locally.
### Directory Structure on Server
- Each table in the keyspace has its own directory.
- Inside each table's directory, there is a `snapshots` directory.
- The `snapshots` directory contains subdirectories for each snapshot, named according to the snapshot tag.
### Local Directory Structure
- The local directory mirrors the server's structure.
- Each table's snapshot data is stored in a directory named after the table, inside the local dump directory.
By following this process, a consistent and reliable backup of the Cassandra keyspace data is achieved, ensuring that the data can be restored or migrated as needed.
## Directory Structure Example
### Server-Side Structure
On the server, the directory structure for the snapshots is organized as follows:
```plaintext
/data
└── dev_keyspace_1
├── table1-abc1234567890abcdef1234567890abcdef
│ └── snapshots
│ └── dump_docker
│ ├── manifest.json
│ ├── nb-1-big-CompressionInfo.db
│ ├── nb-1-big-Data.db
│ ├── nb-1-big-Digest.crc32
│ ├── nb-1-big-Filter.db
│ ├── nb-1-big-Index.db
│ ├── nb-1-big-Statistics.db
│ ├── nb-1-big-Summary.db
│ └── schema.cql
├── table2-def4567890abcdef1234567890abcdef
│ └── snapshots
│ └── dump_docker
│ ├── manifest.json
│ ├── nb-1-big-CompressionInfo.db
│ ├── nb-1-big-Data.db
│ ├── nb-1-big-Digest.crc32
│ ├── nb-1-big-Filter.db
│ ├── nb-1-big-Index.db
│ ├── nb-1-big-Statistics.db
│ ├── nb-1-big-Summary.db
│ └── schema.cql
└── table3-ghi7890abcdef1234567890abcdef
└── snapshots
└── dump_docker
├── manifest.json
├── nb-1-big-CompressionInfo.db
├── nb-1-big-Data.db
├── nb-1-big-Digest.crc32
├── nb-1-big-Filter.db
├── nb-1-big-Index.db
├── nb-1-big-Statistics.db
├── nb-1-big-Summary.db
└── schema.cql
```
#### Local Directory Structure
When copied locally, the directory structure is organized as follows:
```plaintext
data/dumps
├──schema
│ ├── dev_keyspace_1_schema.cql
└── node1
├── table1
│ ├── manifest.json
│ ├── nb-1-big-CompressionInfo.db
│ ├── nb-1-big-Data.db
│ ├── nb-1-big-Digest.crc32
│ ├── nb-1-big-Filter.db
│ ├── nb-1-big-Index.db
│ ├── nb-1-big-Statistics.db
│ ├── nb-1-big-Summary.db
│ └── schema.cql
├── table2
│ ├── manifest.json
│ ├── nb-1-big-CompressionInfo.db
│ ├── nb-1-big-Data.db
│ ├── nb-1-big-Digest.crc32
│ ├── nb-1-big-Filter.db
│ ├── nb-1-big-Index.db
│ ├── nb-1-big-Statistics.db
│ ├── nb-1-big-Summary.db
│ └── schema.cql
└── table3
├── manifest.json
├── nb-1-big-CompressionInfo.db
├── nb-1-big-Data.db
├── nb-1-big-Digest.crc32
├── nb-1-big-Filter.db
├── nb-1-big-Index.db
├── nb-1-big-Statistics.db
├── nb-1-big-Summary.db
└── schema.cql
```

35
docs/setup.md Normal file
View File

@ -0,0 +1,35 @@
### Cassandra Cluster Setup and Data Migration Workflow
Workflow for setting up a Cassandra cluster with multiple nodes, creating keyspaces and schemas, and exporting and reimporting data. The process ensures synchronization across nodes and efficient data migration using snapshots.
#### Workflow Phases
The workflow is divided into the following phases:
1. **Startup Phase**: All nodes start Cassandra and ensure they are ready to accept connections.
2. **Schema Creation Phase**: The primary node creates the keyspace and schema if they do not exist. This schema is then propagated to other nodes.
3. **Data Import Phase**: Data is imported from snapshots using `sstableloader` only if the schema was newly created.
#### Phase 1: Startup Phase
Each node starts Cassandra and waits for it to be ready before proceeding to the next phase.
- **Primary Node**: Starts Cassandra and waits for other nodes to signal they are ready.
- **Non-Primary Nodes**: Wait for the primary node to be ready before starting Cassandra.
#### Phase 2: Schema Creation Phase
After all nodes are confirmed to be ready, the primary node checks if the keyspace exists and creates it if it does not.
- **Primary Node**:
- Checks if the keyspace exists.
- If the keyspace does not exist, creates the keyspace and applies the schema.
- Waits for the schema to propagate to all nodes.
- **Non-Primary Nodes**:
- Wait for the primary node to complete schema creation and propagation.
#### Phase 3: Data Import Phase
Data is imported into the keyspace using `sstableloader` from the snapshots if the schema was newly created.
- **Primary Node**:
- If the schema was created, imports data from the snapshots.
- **Non-Primary Nodes**:
- Wait for the primary node to complete the data import.

19
scripts/import.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/bash
SNAPSHOT_DIR="$DUMP_DIR/snapshot"
IP_ADDRESS=$(hostname -I | awk '{print $1}')
# Define a logging function
log() {
local MESSAGE="$1"
echo -e "$MESSAGE" | tee -a /var/log/cassandra/import.log
}
log "Importing snapshots using sstableloader..."
for TABLE_DIR in $(ls $SNAPSHOT_DIR); do
TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name
log "Importing table: $TABLE_NAME from directory: $SNAPSHOT_DIR/$TABLE_DIR"
sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$SNAPSHOT_DIR/$TABLE_DIR"
cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2
done

23
scripts/is_cassandra_ready.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/bash
IP_ADDRESS=$(hostname -I | awk '{print $1}')
# Define a logging function
log() {
local MESSAGE="$1"
echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_cassandra_ready.log
}
log "Checking if Cassandra is ready..."
is_cassandra_ready() {
cqlsh $IP_ADDRESS -e 'SHOW HOST' > /dev/null 2>&1
}
is_cassandra_ready
if [ $? -eq 0 ]; then
log "Cassandra is ready."
exit 0
else
log "Cassandra is not ready."
exit 1
fi

35
scripts/is_keyspace_exists.sh Executable file
View File

@ -0,0 +1,35 @@
#!/bin/bash
# Usage: is_keyspace_exists.sh [--keyspace <keyspace>]
# Example: is_keyspace_exists.sh --keyspace dev_keyspace_1
KEYSPACE=${KEYSPACE:-}
# Parse arguments
while [ $# -gt 0 ]; do
case "$1" in
--keyspace)
KEYSPACE="$2"
shift 2
;;
*)
echo "Unknown argument: $1"
exit 1
;;
esac
done
# Check for required arguments or environment variables
if [ -z "$KEYSPACE" ]; then
echo "KEYSPACE is not set. Set it via --keyspace or KEYSPACE environment variable."
exit 1
fi
IP_ADDRESS=$(hostname -I | awk '{print $1}')
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
echo "Keyspace $KEYSPACE EXISTS"
exit 0
fi
echo "Keyspace $KEYSPACE DOES NOT EXIST"
exit 1

36
scripts/is_node_up.sh Executable file
View File

@ -0,0 +1,36 @@
#!/bin/bash
# Define a logging function
log() {
local MESSAGE="$1"
echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_node_up.log
}
# Default values
NODE=${1:-$(hostname -I | awk '{print $1}')}
CASSANDRA_RPC_ADDRESS=${2:-$CASSANDRA_RPC_ADDRESS}
log "Checking if node $NODE is up..."
is_node_up() {
local NODE="$1"
local NODE_STATUS=$(nodetool status -r)
if echo "$NODE_STATUS" | grep -E "^UN" | grep "$NODE" > /dev/null; then
return 0
elif [ "$NODE" = "$CASSANDRA_RPC_ADDRESS" ]; then
NODE_STATUS=$(nodetool status)
if echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
return 0
fi
fi
return 1
}
is_node_up $NODE
if [ $? -eq 0 ]; then
log "Node $NODE is up."
exit 0
else
log "Node $NODE is not up."
exit 1
fi

11
scripts/is_primary_node.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/bash
# PRIMARY_NODE=${PRIMARY_NODE:-}
echo PRIMARY_NODE: $PRIMARY_NODE
if [ "$PRIMARY_NODE" = "true" ]; then
exit 0
else
exit 1
fi

32
scripts/is_schema_agreed.sh Executable file
View File

@ -0,0 +1,32 @@
#!/bin/bash
IP_ADDRESS=$(hostname -I | awk '{print $1}')
SEEDS=(${CASSANDRA_SEEDS//,/ })
# Define a logging function
log() {
local MESSAGE="$1"
echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_schema_agreed.log
}
log "Checking if schema is agreed..."
is_schema_agreed() {
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
SCHEMA_NODES=$(nodetool describecluster | grep -A 1 "Schema versions:" | grep -o '\[.*\]' | tr -d '[]' | tr ',' '\n' | wc -l)
if [ "$SCHEMA_NODES" -eq "${#SEEDS[@]}" ]; then
return 0
fi
fi
return 1
}
is_schema_agreed
if [ $? -eq 0 ]; then
log "Schema is agreed."
exit 0
else
log "Schema is not agreed."
exit 1
fi

View File

@ -8,87 +8,111 @@ log() {
log "RUNNING SETUP" log "RUNNING SETUP"
# Configuration
KEYSPACE="dev_keyspace_1" KEYSPACE="dev_keyspace_1"
DUMP_DIR="/dump" # Ensure DUMP_DIR is defined DUMP_DIR="/dump" # Ensure DUMP_DIR is defined
SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql" # Ensure DUMP_DIR is defined SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"
CASSANDRA_SEEDS="cassandra1,cassandra2,cassandra3" CASSANDRA_SEEDS="cassandra1,cassandra2,cassandra3"
STATUS_DIR="/var/log/cassandra"
IP_ADDRESS=$(hostname -I | awk '{print $1}') IP_ADDRESS=$(hostname -I | awk '{print $1}')
DATA_DIR="/var/lib/cassandra/data/$KEYSPACE"
SNAPSHOT_DIR="$DUMP_DIR/snapshot"
PRIMARY_NODE=${PRIMARY_NODE:-false} # Default to false if not set
SLEEP_DURATION=5 # Sleep duration in seconds for waits
TIMEOUT=3000 # Timeout in seconds for waits
# Initialize SEEDS array # Initialize SEEDS array
SEEDS=(${CASSANDRA_SEEDS//,/ }) SEEDS=(${CASSANDRA_SEEDS//,/ })
# Function to wait for all nodes to be in the 'UN' state # Function to wait for a command to succeed
wait_for_all_nodes_up() { wait_for_command() {
SEEDS=(${CASSANDRA_SEEDS//,/ }) local COMMAND="$1"
local TIMEOUT="$2"
local START_TIME=$(date +%s)
local END_TIME=$((START_TIME + TIMEOUT))
while true; do while true; do
all_up=true if eval "$COMMAND"; then
for seed in "${SEEDS[@]}"; do log "Command succeeded: $COMMAND"
NODE_STATUS=$(nodetool status -r)
if ! echo "$NODE_STATUS" | grep -E "^UN.*$seed" > /dev/null; then
if [ "$seed" = "$CASSANDRA_RPC_ADDRESS" ]; then
NODE_STATUS=$(nodetool status)
if ! echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
log "Node $seed (self) is not up yet..."
all_up=false
break
fi
else
log "Node $seed is not up yet..."
all_up=false
break
fi
fi
done
if [ "$all_up" = true ]; then
log "All nodes are up."
break break
else else
sleep 5 local CURRENT_TIME=$(date +%s)
if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
log "Timed out waiting for command: $COMMAND"
exit 1
fi
log "Command failed: $COMMAND, still waiting"
sleep $SLEEP_DURATION
fi fi
done done
} }
# Function to wait for schema agreement across all nodes # Function to check if a node is up
wait_for_schema_agreement() { is_node_up() {
while true; do local NODE="$1"
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then local NODE_STATUS=$(nodetool status -r)
if nodetool describecluster | grep -q "Schema versions:"; then if echo "$NODE_STATUS" | grep -E "^UN" | grep "$NODE" > /dev/null; then
SCHEMA_COUNT=$(nodetool describecluster | grep -A 1 "Schema versions:" | wc -l) return 0
if [ "$SCHEMA_COUNT" -eq 2 ]; then elif [ "$NODE" = "$CASSANDRA_RPC_ADDRESS" ]; then
log "Schema agreement reached." NODE_STATUS=$(nodetool status)
break if echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
else return 0
log "Waiting for schema agreement..."
fi
fi
else
log "Waiting for keyspace $KEYSPACE to be available..."
fi fi
sleep 5 fi
return 1
}
# Function to wait for all nodes to be up
wait_for_all_nodes_up() {
for seed in "${SEEDS[@]}"; do
wait_for_command "is_node_up $seed" $TIMEOUT
done done
log "All nodes are up."
}
# Function to check for schema agreement and if schema exists
is_schema_agreed() {
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
SCHEMA_NODES=$(nodetool describecluster | grep -A 1 "Schema versions:" | grep -o '\[.*\]' | tr -d '[]' | tr ',' '\n' | wc -l)
if [ "$SCHEMA_NODES" -eq "${#SEEDS[@]}" ]; then
return 0
fi
fi
return 1
}
# Function to check if keyspace exists
is_keyspace_exists() {
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
return 0
fi
return 1
} }
log "setup KEYSPACE: $KEYSPACE" log "setup KEYSPACE: $KEYSPACE"
log "setup DUMP_DIR: $DUMP_DIR" log "setup DUMP_DIR: $DUMP_DIR"
log "setup SCHEMA_PATH: $SCHEMA_PATH" log "setup SCHEMA_PATH: $SCHEMA_PATH"
log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS" log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS"
log "setup STATUS_DIR: $STATUS_DIR"
# Check if the keyspace directory exists and is not empty
if [ -d "$DATA_DIR" ] && [ "$(ls -A $DATA_DIR)" ]; then
log "Data directory $DATA_DIR exists and is not empty. Skipping schema creation and data import."
SCHEMA_CREATED=false
else
log "Data directory $DATA_DIR does not exist or is empty. Proceeding with schema creation and data import."
SCHEMA_CREATED=true
fi
# Wait for cassandra1 to be ready if this is not the primary node # Wait for cassandra1 to be ready if this is not the primary node
if [ "$PRIMARY_NODE" != "true" ]; then if [ "$PRIMARY_NODE" != "true" ]; then
log "Waiting for cassandra1 to be ready..." wait_for_service cassandra1 9042 $TIMEOUT
/wait-for-it.sh cassandra1:9042 -t 60 -- log "cassandra1 is ready"
fi fi
# Start Cassandra in the background # Start Cassandra in the background
cassandra -R & # cassandra -R &
# Wait for Cassandra to be ready # Wait for Cassandra to be ready
log "Waiting for Cassandra to start..." wait_for_command "cqlsh $IP_ADDRESS -e 'SHOW HOST' > /dev/null 2>&1" $TIMEOUT
until cqlsh $IP_ADDRESS -e "SHOW HOST" > /dev/null 2>&1; do
sleep 2
done
# Log the value of PRIMARY_NODE for debugging # Log the value of PRIMARY_NODE for debugging
log "PRIMARY_NODE is set to: $PRIMARY_NODE" log "PRIMARY_NODE is set to: $PRIMARY_NODE"
@ -100,42 +124,30 @@ wait_for_all_nodes_up
# Step 2: Create keyspace and schema on the primary node # Step 2: Create keyspace and schema on the primary node
if [ "$PRIMARY_NODE" = "true" ]; then if [ "$PRIMARY_NODE" = "true" ]; then
log "Checking if keyspace $KEYSPACE exists..." log "Checking if keyspace $KEYSPACE exists..."
if ! cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then if ! is_keyspace_exists; then
log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..." log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..."
cqlsh $IP_ADDRESS -f "$SCHEMA_PATH" cqlsh $IP_ADDRESS -f "$SCHEMA_PATH"
else else
log "Keyspace $KEYSPACE already exists. Ensuring tables exist..." log "Keyspace $KEYSPACE already exists. Ensuring tables exist..."
fi fi
# Signal to secondary nodes that schema creation is complete
touch $STATUS_DIR/schema_created
fi fi
# Step 3: Wait for schema to be created and agreed upon across all nodes # Step 3: Wait for schema to be created and agreed upon across all nodes
log "Waiting for schema agreement across all nodes..." log "Waiting for schema agreement across all nodes..."
wait_for_schema_agreement wait_for_command "is_schema_agreed" $TIMEOUT
# Step 4: Import data using sstableloader if not previously imported # Step 4: Import data using sstableloader if not previously imported
if [ "$PRIMARY_NODE" = "true" ]; then # if [ "$SCHEMA_CREATED" = true ]; then
log "Importing snapshots using sstableloader..." log "Importing snapshots using sstableloader..."
for TABLE_DIR in $(ls $DUMP_DIR); do for TABLE_DIR in $(ls $SNAPSHOT_DIR); do
TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name
log "Importing table: $TABLE_NAME from directory: $DUMP_DIR/$TABLE_DIR" log "Importing table: $TABLE_NAME from directory: $SNAPSHOT_DIR/$TABLE_DIR"
sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$DUMP_DIR/$TABLE_DIR" sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$SNAPSHOT_DIR/$TABLE_DIR"
cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2 cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2
done done
# fi
# Signal to secondary nodes that import is complete
touch $STATUS_DIR/import_complete
else
# Wait for import completion signal from primary node
log "Waiting for import completion signal from primary node..."
while [ ! -f "$STATUS_DIR/import_complete" ]; do
sleep 5
done
fi
log "FINISHED IMPORT" log "FINISHED IMPORT"
# Keep the container running # Keep the container running
tail -f /dev/null # tail -f /dev/null

5
scripts/setup_empty.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/bash
# Keep the container running
tail -f /dev/null

120
scripts/setup_orig.sh Executable file
View File

@ -0,0 +1,120 @@
#!/bin/bash
# Define a logging function
log() {
local MESSAGE="$1"
echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log
}
log "RUNNING SETUP"
# Configuration
KEYSPACE=${KEYSPACE:-dev_keyspace_1}
DUMP_DIR=${DUMP_DIR:-/dump} # Ensure DUMP_DIR is defined
CASSANDRA_SEEDS=${CASSANDRA_SEEDS:-cassandra1,cassandra2,cassandra3}
PRIMARY_NODE=${PRIMARY_NODE:-false} # Default to false if not set
IP_ADDRESS=$(hostname -I | awk '{print $1}')
SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"
DATA_DIR="/var/lib/cassandra/data/$KEYSPACE"
SNAPSHOT_DIR="$DUMP_DIR/snapshot"
# Initialize SEEDS array
SEEDS=(${CASSANDRA_SEEDS//,/ })
SLEEP_DURATION=5 # Sleep duration in seconds for waits
TIMEOUT=300 # Timeout in seconds for waits
# Function to wait for a command to succeed
wait_for_command() {
local COMMAND="$1"
local TIMEOUT="$2"
local START_TIME=$(date +%s)
local END_TIME=$((START_TIME + TIMEOUT))
while true; do
if eval "$COMMAND"; then
log "Command succeeded: $COMMAND"
break
else
local CURRENT_TIME=$(date +%s)
if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
log "Timed out waiting for command: $COMMAND"
exit 1
fi
log "Command failed: $COMMAND, still waiting"
sleep $SLEEP_DURATION
fi
done
}
log "setup KEYSPACE: $KEYSPACE"
log "setup DUMP_DIR: $DUMP_DIR"
log "setup SCHEMA_PATH: $SCHEMA_PATH"
log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS"
# Check if the keyspace directory exists and is not empty
if [ -d "$DATA_DIR" ] && [ "$(ls -A $DATA_DIR)" ]; then
log "Data directory $DATA_DIR exists and is not empty. Skipping schema creation and data import."
EMPTY_DB=false
else
log "Data directory $DATA_DIR does not exist or is empty. Proceeding with schema creation and data import."
EMPTY_DB=true
fi
# # Wait for cassandra1 to be ready if this is not the primary node
# if [ "$PRIMARY_NODE" != "true" ]; then
# wait_for_command "/scripts/is_node_up.sh --node cassandra1 --cassandra_rpc_address $IP_ADDRESS" $TIMEOUT
# fi
# Start Cassandra in the background
cassandra -R &
# Wait for Cassandra to be ready
wait_for_command "/scripts/is_cassandra_ready.sh" $TIMEOUT
# Log the value of PRIMARY_NODE for debugging
log "PRIMARY_NODE is set to: $PRIMARY_NODE"
# Step 1: Wait for all nodes to be up and ready
log "Waiting for all nodes to be up and ready..."
wait_for_command "/scripts/is_node_up.sh --node $seed " $TIMEOUT
// TODO: aspettare tutti i nodi
# Function to wait for all nodes to be up
wait_for_all_nodes_up() {
for seed in "${SEEDS[@]}"; do
wait_for_command "/scripts/is_node_up.sh --node $seed " $TIMEOUT
done
log "All nodes are up."
}
wait_for_all_nodes_up
# Step 2: Create keyspace and schema on the primary node
if [ "$PRIMARY_NODE" = "true" ]; then
log "Checking if keyspace $KEYSPACE exists..."
if ! /scripts/is_keyspace_exists.sh --keyspace "$KEYSPACE"; then
log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..."
cqlsh $IP_ADDRESS -f "$SCHEMA_PATH"
else
log "Keyspace $KEYSPACE already exists. Ensuring tables exist..."
fi
fi
# Step 3: Wait for schema to be created and agreed upon across all nodes
log "Waiting for schema agreement across all nodes..."
wait_for_command "/scripts/is_schema_agreed.sh --keyspace $KEYSPACE --cassandra_seeds $CASSANDRA_SEEDS" $TIMEOUT
# Step 4: Import data using sstableloader if not previously imported
if [ "$EMPTY_DB" = true ]; then
log "Importing snapshots using sstableloader..."
/scripts/import.sh --keyspace "$KEYSPACE" --dump_dir "$SNAPSHOT_DIR" --cassandra_seeds "$CASSANDRA_SEEDS"
fi
log "FINISHED IMPORT"
# Keep the container running
tail -f /dev/null

31
scripts/wait_for_command.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/bash
COMMAND="$1"
TIMEOUT="$2"
SLEEP_DURATION=5 # Sleep duration in seconds for waits
log() {
local MESSAGE="$1"
echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log
}
wait_for_command() {
local START_TIME=$(date +%s)
local END_TIME=$((START_TIME + TIMEOUT))
while true; do
if eval "$COMMAND"; then
log "Command succeeded: $COMMAND"
break
else
local CURRENT_TIME=$(date +%s)
if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
log "Timed out waiting for command: $COMMAND"
exit 1
fi
sleep $SLEEP_DURATION
fi
done
}
wait_for_command