tutto funzionante. /scripts/setup.sh deve essere avviato a mano su ogni stanza, la prima volta che si avviano i docker

2024-08-01 11:25:40 +02:00 · 2024-08-01 11:25:40 +02:00 · 470416125e
parent 869802c6e0
commit 470416125e
16 changed files with 643 additions and 97 deletions
--- a/10
+++ b/10
@ -2,7 +2,7 @@
 FROM cassandra:4.1.3
 # Install gettext to use envsubst
-RUN apt-get update && apt-get install -y iputils-ping less locate gettext-base
+# RUN apt-get update && apt-get install -y iputils-ping less locate gettext-base
 # Environment variables to configure Cassandra
 ENV CASSANDRA_CLUSTER_NAME=TestCluster
@ -20,19 +20,19 @@ ENV CASSANDRA_RACK=RAC1
 # Copy cassandra.yaml and cassandra-rackdc.properties
 COPY cassandra.yaml /etc/cassandra/
-COPY cassandra-rackdc.properties /etc/cassandra/
+# COPY cassandra-rackdc.properties /etc/cassandra/
 # Substitute environment variables in cassandra.yaml
-RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp && mv /etc/cassandra/cassandra.yaml.tmp /etc/cassandra/cassandra.yaml
+# RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp && mv /etc/cassandra/cassandra.yaml.tmp /etc/cassandra/cassandra.yaml
 # Substitute environment variables in cassandra-rackdc.properties
-RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties
+# RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties
 # COPY scripts/setup.sh /setup.sh
 # VOLUME "/scripts/setup.sh"
 # Set the entrypoint
-ENTRYPOINT ["/scripts/setup.sh"]
+# ENTRYPOINT ["/scripts/setup.sh"]
 # Expose Cassandra ports
 EXPOSE 7000 7001 7199 9042 9160
--- a/README.md
+++ b/README.md
@ -8,8 +8,10 @@ in dev environment, cassandra has been installed manually and nodetool is not in
 * dev should be aliegned to prod
 https://medium.com/@kayvan.sol2/deploying-apache-cassandra-cluster-3-nodes-with-docker-compose-3634ef8345e8
-# DOCKER
+
 ## DOCKER instructions
 aggiungere a /etc/host l'entry:
@ -23,13 +25,26 @@ force recreate docker image
 `docker compose build --no-cache`
 check  status:
-`docker exec -it cassandra-cassandra-1 nodetool status`
+`docker exec -it cassandra-1 nodetool status`
 ## import db
 `./dump.sh`
 `docker compose up --build`
 attendere che tutto sia pronto e i db siano sincronizzati
 TODO: definire esattamente come, tendenzialmente con nodetool status , nodetool gossip, etc
 eseguire uno dopo l'altro, quando sono terminati
 * cassandra1: `docker exec -it cassandra1 /scripts/setup`
 * cassandra2: `docker exec -it cassandra2 /scripts/setup`
 * cassandra3: `docker exec -it cassandra3 /scripts/setup`
 run a single service:
-* cassandra1: `docker-compose up cassandra-cassandra1 --build`
+* cassandra1: `docker-compose up cassandra1 --build`
-* cassandra2: `docker-compose up cassandra-cassandra2 --build`
+* cassandra2: `docker-compose up cassandra2 --build`
 * ,...
 open bash on server
@ -38,4 +53,27 @@ open bash on server
 * cassandra2: `docker exec -it cassandra2 /bin/bash`
 * cassandra3: `docker exec -it cassandra3 /bin/bash`
 ## check cassandra status
 Check status
 `nodetool status`
 Check if the Gossip protocol is enabled
 `nodetool info | grep -i gossip`
 Check the status of the Gossip protocol
 `nodetool gossipinfo`
 Check the communication between nodes
 `nodetool ring`
 ## Documentation
 * [cassandra dump data](docs/dump.md)
 * [cassandra setup and import](docs/setup.md)
--- a/cassandra.yaml
+++ b/cassandra.yaml
@ -1,11 +1,11 @@
-# Cluster name
+# # Cluster name
-cluster_name: ${CASSANDRA_CLUSTER_NAME}
+# cluster_name: ${CASSANDRA_CLUSTER_NAME}
-# Addresses
+# # Addresses
-listen_address: ${CASSANDRA_LISTEN_ADDRESS}
+# listen_address: ${CASSANDRA_LISTEN_ADDRESS}
-broadcast_address: ${CASSANDRA_BROADCAST_ADDRESS}
+# broadcast_address: ${CASSANDRA_BROADCAST_ADDRESS}
 # rpc_address: 0.0.0.0
-broadcast_rpc_address: ${CASSANDRA_RPC_ADDRESS}
+# broadcast_rpc_address: ${CASSANDRA_RPC_ADDRESS}
 # Seed nodes
 seed_provider:
@ -21,12 +21,12 @@ commitlog_directory: /var/lib/cassandra/commitlog
 saved_caches_directory: /var/lib/cassandra/saved_caches
-client_encryption_options:
+# client_encryption_options:
-  enabled: false
+#   enabled: false
-  optional: false
+#   optional: false
 # Tokens and allocation
-num_tokens: ${CASSANDRA_NUM_TOKENS}
+# num_tokens: ${CASSANDRA_NUM_TOKENS}
 allocate_tokens_for_local_replication_factor: 3
 # Hinted handoff settings
@ -122,7 +122,8 @@ request_timeout: 1000000ms
 slow_query_log_timeout: 500ms
 # Snitch settings
-endpoint_snitch: GossipingPropertyFileSnitch
+# endpoint_snitch: GossipingPropertyFileSnitch
 endpoint_snitch: SimpleSnitch
 dynamic_snitch_update_interval: 100ms
 dynamic_snitch_reset_interval: 600000ms
 dynamic_snitch_badness_threshold: 1.0
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,5 +1,3 @@
 version: '3.8'
 services:
  cassandra1:
    build: .
@ -18,11 +16,20 @@ services:
      - ./data/dumps/node1:/dump/snapshot
      - ./data/dumps/schema:/dump/schema
      - ./data/volumes/node1:/var/lib/cassandra
-      - ./logs/node1:/var/log/cassandra
+      - ./data/logs/node1:/var/log/cassandra
    healthcheck:
      test: ["CMD-SHELL", "nodetool status"]
      interval: 2m
      start_period: 2m
      timeout: 10s
      retries: 3
    ports:
      - "9042:9042"
    networks:
      - cassandra-net
    restart:
      on-failure
  cassandra2:
    build: .
@ -36,16 +43,25 @@ services:
      - CASSANDRA_DC=DC1
      - CASSANDRA_RACK=RAC1
      - PRIMARY_NODE=false
    healthcheck:
      test: ["CMD-SHELL", "nodetool status"]
      interval: 2m
      start_period: 2m
      timeout: 10s
      retries: 3
    volumes:
      - ./scripts:/scripts
      - ./data/dumps/node2:/dump/snapshot
      - ./data/dumps/schema:/dump/schema
      - ./data/volumes/node2:/var/lib/cassandra
-      - ./logs/node2:/var/log/cassandra
+      - ./data/logs/node2:/var/log/cassandra
    networks:
      - cassandra-net
    depends_on:
-      - cassandra1
+      cassandra1:
        condition: service_healthy
    restart:
      on-failure
  cassandra3:
    build: .
@ -59,16 +75,27 @@ services:
      - CASSANDRA_DC=DC1
      - CASSANDRA_RACK=RAC1
      - PRIMARY_NODE=false
    healthcheck:
      test: ["CMD-SHELL", "nodetool status"]
      interval: 2m
      start_period: 2m
      timeout: 10s
      retries: 3
    volumes:
      - ./scripts:/scripts
      - ./data/dumps/node3:/dump/snapshot
      - ./data/dumps/schema:/dump/schema
      - ./data/volumes/node3:/var/lib/cassandra
-      - ./logs/node3:/var/log/cassandra
+      - ./data/logs/node3:/var/log/cassandra
    networks:
      - cassandra-net
    depends_on:
-      - cassandra2
+      cassandra2:
        condition: service_healthy
    restart:
      on-failure
 networks:
  cassandra-net:
--- a/docs/dump.md
+++ b/docs/dump.md
@ -0,0 +1,121 @@
 # Documentation: Exporting Data from Existing Cassandra Cluster
 This process exports data from an existing Cassandra cluster by creating snapshots on each node and copying the data to a local directory.
 The steps ensure a consistent and reliable backup of the keyspace data.
 The snapshot creation and data synchronization steps are executed in parallel for all nodes to speed up the process and ensure consistency. 
 ## Dump Process
 The data dump process involves taking a snapshot of the keyspace from each Cassandra node, copying the snapshots locally, and exporting the keyspace schema. This process is performed in parallel for efficiency.
 1. **Clear Old Snapshots:**
   - For each node, remove any existing snapshots with the specified tag to ensure a clean state.
 2. **Create New Snapshots:**
   - For each node, create a new snapshot with the specified tag.
 3. **Synchronize Snapshots Locally:**
   - Copy the snapshot data from each node to the local directory. Each table's data is copied into a directory named after the table.
 4. **Export Keyspace Schema:**
   - Export the keyspace schema from the first node and save it locally.
 ### Directory Structure on Server
 - Each table in the keyspace has its own directory.
 - Inside each table's directory, there is a `snapshots` directory.
 - The `snapshots` directory contains subdirectories for each snapshot, named according to the snapshot tag.
 ### Local Directory Structure
 - The local directory mirrors the server's structure.
 - Each table's snapshot data is stored in a directory named after the table, inside the local dump directory.
 By following this process, a consistent and reliable backup of the Cassandra keyspace data is achieved, ensuring that the data can be restored or migrated as needed.
 ## Directory Structure Example
 ### Server-Side Structure
 On the server, the directory structure for the snapshots is organized as follows:
 ```plaintext
 /data
 └── dev_keyspace_1
    ├── table1-abc1234567890abcdef1234567890abcdef
    │   └── snapshots
    │       └── dump_docker
    │           ├── manifest.json
    │           ├── nb-1-big-CompressionInfo.db
    │           ├── nb-1-big-Data.db
    │           ├── nb-1-big-Digest.crc32
    │           ├── nb-1-big-Filter.db
    │           ├── nb-1-big-Index.db
    │           ├── nb-1-big-Statistics.db
    │           ├── nb-1-big-Summary.db
    │           └── schema.cql
    ├── table2-def4567890abcdef1234567890abcdef
    │   └── snapshots
    │       └── dump_docker
    │           ├── manifest.json
    │           ├── nb-1-big-CompressionInfo.db
    │           ├── nb-1-big-Data.db
    │           ├── nb-1-big-Digest.crc32
    │           ├── nb-1-big-Filter.db
    │           ├── nb-1-big-Index.db
    │           ├── nb-1-big-Statistics.db
    │           ├── nb-1-big-Summary.db
    │           └── schema.cql
    └── table3-ghi7890abcdef1234567890abcdef
        └── snapshots
            └── dump_docker
                ├── manifest.json
                ├── nb-1-big-CompressionInfo.db
                ├── nb-1-big-Data.db
                ├── nb-1-big-Digest.crc32
                ├── nb-1-big-Filter.db
                ├── nb-1-big-Index.db
                ├── nb-1-big-Statistics.db
                ├── nb-1-big-Summary.db
                └── schema.cql
 ```
 #### Local Directory Structure
 When copied locally, the directory structure is organized as follows:
 ```plaintext
 data/dumps
 ├──schema
 │   ├── dev_keyspace_1_schema.cql
 └── node1
    ├── table1
    │   ├── manifest.json
    │   ├── nb-1-big-CompressionInfo.db
    │   ├── nb-1-big-Data.db
    │   ├── nb-1-big-Digest.crc32
    │   ├── nb-1-big-Filter.db
    │   ├── nb-1-big-Index.db
    │   ├── nb-1-big-Statistics.db
    │   ├── nb-1-big-Summary.db
    │   └── schema.cql
    ├── table2
    │   ├── manifest.json
    │   ├── nb-1-big-CompressionInfo.db
    │   ├── nb-1-big-Data.db
    │   ├── nb-1-big-Digest.crc32
    │   ├── nb-1-big-Filter.db
    │   ├── nb-1-big-Index.db
    │   ├── nb-1-big-Statistics.db
    │   ├── nb-1-big-Summary.db
    │   └── schema.cql
    └── table3
        ├── manifest.json
        ├── nb-1-big-CompressionInfo.db
        ├── nb-1-big-Data.db
        ├── nb-1-big-Digest.crc32
        ├── nb-1-big-Filter.db
        ├── nb-1-big-Index.db
        ├── nb-1-big-Statistics.db
        ├── nb-1-big-Summary.db
        └── schema.cql
 ```
--- a/docs/setup.md
+++ b/docs/setup.md
@ -0,0 +1,35 @@
 ### Cassandra Cluster Setup and Data Migration Workflow
 Workflow for setting up a Cassandra cluster with multiple nodes, creating keyspaces and schemas, and exporting and reimporting data. The process ensures synchronization across nodes and efficient data migration using snapshots.
 #### Workflow Phases
 The workflow is divided into the following phases:
 1. **Startup Phase**: All nodes start Cassandra and ensure they are ready to accept connections.
 2. **Schema Creation Phase**: The primary node creates the keyspace and schema if they do not exist. This schema is then propagated to other nodes.
 3. **Data Import Phase**: Data is imported from snapshots using `sstableloader` only if the schema was newly created.
 #### Phase 1: Startup Phase
 Each node starts Cassandra and waits for it to be ready before proceeding to the next phase.
 - **Primary Node**: Starts Cassandra and waits for other nodes to signal they are ready.
 - **Non-Primary Nodes**: Wait for the primary node to be ready before starting Cassandra.
 #### Phase 2: Schema Creation Phase
 After all nodes are confirmed to be ready, the primary node checks if the keyspace exists and creates it if it does not.
 - **Primary Node**:
  - Checks if the keyspace exists.
  - If the keyspace does not exist, creates the keyspace and applies the schema.
  - Waits for the schema to propagate to all nodes.
 - **Non-Primary Nodes**:
  - Wait for the primary node to complete schema creation and propagation.
 #### Phase 3: Data Import Phase
 Data is imported into the keyspace using `sstableloader` from the snapshots if the schema was newly created.
 - **Primary Node**:
  - If the schema was created, imports data from the snapshots.
 - **Non-Primary Nodes**:
  - Wait for the primary node to complete the data import.
--- a/scripts/import.sh
+++ b/scripts/import.sh
@ -0,0 +1,19 @@
 #!/bin/bash
 SNAPSHOT_DIR="$DUMP_DIR/snapshot"
 IP_ADDRESS=$(hostname -I | awk '{print $1}')
 # Define a logging function
 log() {
    local MESSAGE="$1"
    echo -e "$MESSAGE" | tee -a /var/log/cassandra/import.log
 }
 log "Importing snapshots using sstableloader..."
 for TABLE_DIR in $(ls $SNAPSHOT_DIR); do
    TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name
    log "Importing table: $TABLE_NAME from directory: $SNAPSHOT_DIR/$TABLE_DIR"
    sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$SNAPSHOT_DIR/$TABLE_DIR"
    cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2
 done
--- a/scripts/is_cassandra_ready.sh
+++ b/scripts/is_cassandra_ready.sh
@ -0,0 +1,23 @@
 #!/bin/bash
 IP_ADDRESS=$(hostname -I | awk '{print $1}')
 # Define a logging function
 log() {
    local MESSAGE="$1"
    echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_cassandra_ready.log
 }
 log "Checking if Cassandra is ready..."
 is_cassandra_ready() {
    cqlsh $IP_ADDRESS -e 'SHOW HOST' > /dev/null 2>&1
 }
 is_cassandra_ready
 if [ $? -eq 0 ]; then
    log "Cassandra is ready."
    exit 0
 else
    log "Cassandra is not ready."
    exit 1
 fi
--- a/scripts/is_keyspace_exists.sh
+++ b/scripts/is_keyspace_exists.sh
@ -0,0 +1,35 @@
 #!/bin/bash
 # Usage: is_keyspace_exists.sh [--keyspace <keyspace>]
 # Example: is_keyspace_exists.sh --keyspace dev_keyspace_1
 KEYSPACE=${KEYSPACE:-}
 # Parse arguments
 while [ $# -gt 0 ]; do
  case "$1" in
    --keyspace)
      KEYSPACE="$2"
      shift 2
      ;;
    *)
      echo "Unknown argument: $1"
      exit 1
      ;;
  esac
 done
 # Check for required arguments or environment variables
 if [ -z "$KEYSPACE" ]; then
  echo "KEYSPACE is not set. Set it via --keyspace or KEYSPACE environment variable."
  exit 1
 fi
 IP_ADDRESS=$(hostname -I | awk '{print $1}')
 if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
    echo "Keyspace $KEYSPACE EXISTS"
    exit 0
 fi
 echo "Keyspace $KEYSPACE DOES NOT EXIST"
 exit 1
--- a/scripts/is_node_up.sh
+++ b/scripts/is_node_up.sh
@ -0,0 +1,36 @@
 #!/bin/bash
 # Define a logging function
 log() {
    local MESSAGE="$1"
    echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_node_up.log
 }
 # Default values
 NODE=${1:-$(hostname -I | awk '{print $1}')}
 CASSANDRA_RPC_ADDRESS=${2:-$CASSANDRA_RPC_ADDRESS}
 log "Checking if node $NODE is up..."
 is_node_up() {
    local NODE="$1"
    local NODE_STATUS=$(nodetool status -r)
    if echo "$NODE_STATUS" | grep -E "^UN" | grep "$NODE" > /dev/null; then
        return 0
    elif [ "$NODE" = "$CASSANDRA_RPC_ADDRESS" ]; then
        NODE_STATUS=$(nodetool status)
        if echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
            return 0
        fi
    fi
    return 1
 }
 is_node_up $NODE
 if [ $? -eq 0 ]; then
    log "Node $NODE is up."
    exit 0
 else
    log "Node $NODE is not up."
    exit 1
 fi
--- a/scripts/is_primary_node.sh
+++ b/scripts/is_primary_node.sh
@ -0,0 +1,11 @@
 #!/bin/bash
 # PRIMARY_NODE=${PRIMARY_NODE:-}
 echo PRIMARY_NODE: $PRIMARY_NODE
 if [ "$PRIMARY_NODE" = "true" ]; then    
    exit 0
 else
    exit 1
 fi
--- a/scripts/is_schema_agreed.sh
+++ b/scripts/is_schema_agreed.sh
@ -0,0 +1,32 @@
 #!/bin/bash
 IP_ADDRESS=$(hostname -I | awk '{print $1}')
 SEEDS=(${CASSANDRA_SEEDS//,/ })
 # Define a logging function
 log() {
    local MESSAGE="$1"
    echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_schema_agreed.log
 }
 log "Checking if schema is agreed..."
 is_schema_agreed() {
    if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
        SCHEMA_NODES=$(nodetool describecluster | grep -A 1 "Schema versions:" | grep -o '\[.*\]' | tr -d '[]' | tr ',' '\n' | wc -l)
        if [ "$SCHEMA_NODES" -eq "${#SEEDS[@]}" ]; then
            return 0
        fi
    fi
    return 1
 }
 is_schema_agreed
 if [ $? -eq 0 ]; then
    log "Schema is agreed."
    exit 0
 else
    log "Schema is not agreed."
    exit 1
 fi
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@ -8,87 +8,111 @@ log() {
 log "RUNNING SETUP"
 # Configuration
 KEYSPACE="dev_keyspace_1"
 DUMP_DIR="/dump"  # Ensure DUMP_DIR is defined
-SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"  # Ensure DUMP_DIR is defined
+SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"
 CASSANDRA_SEEDS="cassandra1,cassandra2,cassandra3"
 STATUS_DIR="/var/log/cassandra"
 IP_ADDRESS=$(hostname -I | awk '{print $1}')
 DATA_DIR="/var/lib/cassandra/data/$KEYSPACE"
 SNAPSHOT_DIR="$DUMP_DIR/snapshot"
 PRIMARY_NODE=${PRIMARY_NODE:-false}  # Default to false if not set
 SLEEP_DURATION=5  # Sleep duration in seconds for waits
 TIMEOUT=3000  # Timeout in seconds for waits
 # Initialize SEEDS array
 SEEDS=(${CASSANDRA_SEEDS//,/ })
-# Function to wait for all nodes to be in the 'UN' state
+# Function to wait for a command to succeed
-wait_for_all_nodes_up() {
+wait_for_command() {
-    SEEDS=(${CASSANDRA_SEEDS//,/ })
+    local COMMAND="$1"
    local TIMEOUT="$2"
    local START_TIME=$(date +%s)
    local END_TIME=$((START_TIME + TIMEOUT))
    while true; do
-        all_up=true
+        if eval "$COMMAND"; then
-        for seed in "${SEEDS[@]}"; do
+            log "Command succeeded: $COMMAND"
            NODE_STATUS=$(nodetool status -r)
            if ! echo "$NODE_STATUS" | grep -E "^UN.*$seed" > /dev/null; then
                if [ "$seed" = "$CASSANDRA_RPC_ADDRESS" ]; then
                    NODE_STATUS=$(nodetool status)
                    if ! echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
                        log "Node $seed (self) is not up yet..."
                        all_up=false
                        break
                    fi
                else
                    log "Node $seed is not up yet..."
                    all_up=false
                    break
                fi
            fi
        done
        if [ "$all_up" = true ]; then
            log "All nodes are up."
            break
        else
-            sleep 5
+            local CURRENT_TIME=$(date +%s)
            if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
                log "Timed out waiting for command: $COMMAND"
                exit 1
            fi
            log "Command failed: $COMMAND, still waiting"
            sleep $SLEEP_DURATION
        fi
    done
 }
-# Function to wait for schema agreement across all nodes
+# Function to check if a node is up
-wait_for_schema_agreement() {
+is_node_up() {
-    while true; do
+    local NODE="$1"
-        if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
+    local NODE_STATUS=$(nodetool status -r)
-            if nodetool describecluster | grep -q "Schema versions:"; then
+    if echo "$NODE_STATUS" | grep -E "^UN" | grep "$NODE" > /dev/null; then
-                SCHEMA_COUNT=$(nodetool describecluster | grep -A 1 "Schema versions:" | wc -l)
+        return 0
-                if [ "$SCHEMA_COUNT" -eq 2 ]; then
+    elif [ "$NODE" = "$CASSANDRA_RPC_ADDRESS" ]; then
-                    log "Schema agreement reached."
+        NODE_STATUS=$(nodetool status)
-                    break
+        if echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
-                else
+            return 0
                    log "Waiting for schema agreement..."
                fi
            fi
        else
            log "Waiting for keyspace $KEYSPACE to be available..."
        fi
-        sleep 5
+    fi
    return 1
 }
 # Function to wait for all nodes to be up
 wait_for_all_nodes_up() {
    for seed in "${SEEDS[@]}"; do
        wait_for_command "is_node_up $seed" $TIMEOUT
    done
    log "All nodes are up."
 }
 # Function to check for schema agreement and if schema exists
 is_schema_agreed() {
    if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
        SCHEMA_NODES=$(nodetool describecluster | grep -A 1 "Schema versions:" | grep -o '\[.*\]' | tr -d '[]' | tr ',' '\n' | wc -l)
        if [ "$SCHEMA_NODES" -eq "${#SEEDS[@]}" ]; then
            return 0
        fi
    fi
    return 1
 }
 # Function to check if keyspace exists
 is_keyspace_exists() {
    if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
        return 0
    fi
    return 1
 }
 log "setup KEYSPACE: $KEYSPACE"
 log "setup DUMP_DIR: $DUMP_DIR"
 log "setup SCHEMA_PATH: $SCHEMA_PATH"
 log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS"
-log "setup STATUS_DIR: $STATUS_DIR"
+
 # Check if the keyspace directory exists and is not empty
 if [ -d "$DATA_DIR" ] && [ "$(ls -A $DATA_DIR)" ]; then
    log "Data directory $DATA_DIR exists and is not empty. Skipping schema creation and data import."
    SCHEMA_CREATED=false
 else
    log "Data directory $DATA_DIR does not exist or is empty. Proceeding with schema creation and data import."
    SCHEMA_CREATED=true
 fi
 # Wait for cassandra1 to be ready if this is not the primary node
 if [ "$PRIMARY_NODE" != "true" ]; then
-    log "Waiting for cassandra1 to be ready..."
+    wait_for_service cassandra1 9042 $TIMEOUT
    /wait-for-it.sh cassandra1:9042 -t 60 -- log "cassandra1 is ready"
 fi
 # Start Cassandra in the background
-cassandra -R &
+# cassandra -R &
 # Wait for Cassandra to be ready
-log "Waiting for Cassandra to start..."
+wait_for_command "cqlsh $IP_ADDRESS -e 'SHOW HOST' > /dev/null 2>&1" $TIMEOUT
 until cqlsh $IP_ADDRESS -e "SHOW HOST" > /dev/null 2>&1; do
    sleep 2
 done
 # Log the value of PRIMARY_NODE for debugging
 log "PRIMARY_NODE is set to: $PRIMARY_NODE"
@ -100,42 +124,30 @@ wait_for_all_nodes_up
 # Step 2: Create keyspace and schema on the primary node
 if [ "$PRIMARY_NODE" = "true" ]; then
    log "Checking if keyspace $KEYSPACE exists..."
-    if ! cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
+    if ! is_keyspace_exists; then
        log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..."
        cqlsh $IP_ADDRESS -f "$SCHEMA_PATH"
    else
        log "Keyspace $KEYSPACE already exists. Ensuring tables exist..."
    fi
    # Signal to secondary nodes that schema creation is complete
    touch $STATUS_DIR/schema_created
 fi
 # Step 3: Wait for schema to be created and agreed upon across all nodes
 log "Waiting for schema agreement across all nodes..."
-wait_for_schema_agreement
+wait_for_command "is_schema_agreed" $TIMEOUT
 # Step 4: Import data using sstableloader if not previously imported
-if [ "$PRIMARY_NODE" = "true" ]; then
+# if [ "$SCHEMA_CREATED" = true ]; then
    log "Importing snapshots using sstableloader..."
-    for TABLE_DIR in $(ls $DUMP_DIR); do
+    for TABLE_DIR in $(ls $SNAPSHOT_DIR); do
        TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name
-        log "Importing table: $TABLE_NAME from directory: $DUMP_DIR/$TABLE_DIR"
+        log "Importing table: $TABLE_NAME from directory: $SNAPSHOT_DIR/$TABLE_DIR"
-        sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$DUMP_DIR/$TABLE_DIR"
+        sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$SNAPSHOT_DIR/$TABLE_DIR"
        cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2
    done
-
+# fi
    # Signal to secondary nodes that import is complete
    touch $STATUS_DIR/import_complete
 else
    # Wait for import completion signal from primary node
    log "Waiting for import completion signal from primary node..."
    while [ ! -f "$STATUS_DIR/import_complete" ]; do
        sleep 5
    done
 fi
 log "FINISHED IMPORT"
 # Keep the container running
-tail -f /dev/null
+# tail -f /dev/null
--- a/scripts/setup_empty.sh
+++ b/scripts/setup_empty.sh
@ -0,0 +1,5 @@
 #!/bin/bash
 # Keep the container running
 tail -f /dev/null
--- a/scripts/setup_orig.sh
+++ b/scripts/setup_orig.sh
@ -0,0 +1,120 @@
 #!/bin/bash
 # Define a logging function
 log() {
    local MESSAGE="$1"
    echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log
 }
 log "RUNNING SETUP"
 # Configuration
 KEYSPACE=${KEYSPACE:-dev_keyspace_1}
 DUMP_DIR=${DUMP_DIR:-/dump}  # Ensure DUMP_DIR is defined
 CASSANDRA_SEEDS=${CASSANDRA_SEEDS:-cassandra1,cassandra2,cassandra3}
 PRIMARY_NODE=${PRIMARY_NODE:-false}  # Default to false if not set
 IP_ADDRESS=$(hostname -I | awk '{print $1}')
 SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"
 DATA_DIR="/var/lib/cassandra/data/$KEYSPACE"
 SNAPSHOT_DIR="$DUMP_DIR/snapshot"
 # Initialize SEEDS array
 SEEDS=(${CASSANDRA_SEEDS//,/ })
 SLEEP_DURATION=5  # Sleep duration in seconds for waits
 TIMEOUT=300  # Timeout in seconds for waits
 # Function to wait for a command to succeed
 wait_for_command() {
    local COMMAND="$1"
    local TIMEOUT="$2"
    local START_TIME=$(date +%s)
    local END_TIME=$((START_TIME + TIMEOUT))
    while true; do
        if eval "$COMMAND"; then
            log "Command succeeded: $COMMAND"
            break
        else
            local CURRENT_TIME=$(date +%s)
            if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
                log "Timed out waiting for command: $COMMAND"
                exit 1
            fi
            log "Command failed: $COMMAND, still waiting"
            sleep $SLEEP_DURATION
        fi
    done
 }
 log "setup KEYSPACE: $KEYSPACE"
 log "setup DUMP_DIR: $DUMP_DIR"
 log "setup SCHEMA_PATH: $SCHEMA_PATH"
 log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS"
 # Check if the keyspace directory exists and is not empty
 if [ -d "$DATA_DIR" ] && [ "$(ls -A $DATA_DIR)" ]; then
    log "Data directory $DATA_DIR exists and is not empty. Skipping schema creation and data import."
    EMPTY_DB=false
 else
    log "Data directory $DATA_DIR does not exist or is empty. Proceeding with schema creation and data import."
    EMPTY_DB=true
 fi
 # # Wait for cassandra1 to be ready if this is not the primary node
 # if [ "$PRIMARY_NODE" != "true" ]; then
 #     wait_for_command "/scripts/is_node_up.sh --node cassandra1 --cassandra_rpc_address $IP_ADDRESS" $TIMEOUT
 # fi
 # Start Cassandra in the background
 cassandra -R &
 # Wait for Cassandra to be ready
 wait_for_command "/scripts/is_cassandra_ready.sh" $TIMEOUT
 # Log the value of PRIMARY_NODE for debugging
 log "PRIMARY_NODE is set to: $PRIMARY_NODE"
 # Step 1: Wait for all nodes to be up and ready
 log "Waiting for all nodes to be up and ready..."
 wait_for_command "/scripts/is_node_up.sh --node $seed " $TIMEOUT
 // TODO: aspettare tutti i nodi
 # Function to wait for all nodes to be up
 wait_for_all_nodes_up() {
    for seed in "${SEEDS[@]}"; do
        wait_for_command "/scripts/is_node_up.sh --node $seed " $TIMEOUT
    done
    log "All nodes are up."
 }
 wait_for_all_nodes_up
 # Step 2: Create keyspace and schema on the primary node
 if [ "$PRIMARY_NODE" = "true" ]; then
    log "Checking if keyspace $KEYSPACE exists..."
    if ! /scripts/is_keyspace_exists.sh --keyspace "$KEYSPACE"; then
        log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..."
        cqlsh $IP_ADDRESS -f "$SCHEMA_PATH"
    else
        log "Keyspace $KEYSPACE already exists. Ensuring tables exist..."
    fi
 fi
 # Step 3: Wait for schema to be created and agreed upon across all nodes
 log "Waiting for schema agreement across all nodes..."
 wait_for_command "/scripts/is_schema_agreed.sh --keyspace $KEYSPACE --cassandra_seeds $CASSANDRA_SEEDS" $TIMEOUT
 # Step 4: Import data using sstableloader if not previously imported
 if [ "$EMPTY_DB" = true ]; then
    log "Importing snapshots using sstableloader..."
    /scripts/import.sh --keyspace "$KEYSPACE" --dump_dir "$SNAPSHOT_DIR" --cassandra_seeds "$CASSANDRA_SEEDS"
 fi
 log "FINISHED IMPORT"
 # Keep the container running
 tail -f /dev/null
--- a/scripts/wait_for_command.sh
+++ b/scripts/wait_for_command.sh
@ -0,0 +1,31 @@
 #!/bin/bash
 COMMAND="$1"
 TIMEOUT="$2"
 SLEEP_DURATION=5  # Sleep duration in seconds for waits
 log() {
    local MESSAGE="$1"
    echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log
 }
 wait_for_command() {
    local START_TIME=$(date +%s)
    local END_TIME=$((START_TIME + TIMEOUT))
    while true; do
        if eval "$COMMAND"; then
            log "Command succeeded: $COMMAND"
            break
        else
            local CURRENT_TIME=$(date +%s)
            if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
                log "Timed out waiting for command: $COMMAND"
                exit 1
            fi
            sleep $SLEEP_DURATION
        fi
    done
 }
 wait_for_command