tutto funzionante. /scripts/setup.sh deve essere avviato a mano su ogni stanza, la prima volta che si avviano i docker
This commit is contained in:
parent
869802c6e0
commit
470416125e
10
Dockerfile
10
Dockerfile
|
@ -2,7 +2,7 @@
|
|||
FROM cassandra:4.1.3
|
||||
|
||||
# Install gettext to use envsubst
|
||||
RUN apt-get update && apt-get install -y iputils-ping less locate gettext-base
|
||||
# RUN apt-get update && apt-get install -y iputils-ping less locate gettext-base
|
||||
|
||||
# Environment variables to configure Cassandra
|
||||
ENV CASSANDRA_CLUSTER_NAME=TestCluster
|
||||
|
@ -20,19 +20,19 @@ ENV CASSANDRA_RACK=RAC1
|
|||
|
||||
# Copy cassandra.yaml and cassandra-rackdc.properties
|
||||
COPY cassandra.yaml /etc/cassandra/
|
||||
COPY cassandra-rackdc.properties /etc/cassandra/
|
||||
# COPY cassandra-rackdc.properties /etc/cassandra/
|
||||
|
||||
# Substitute environment variables in cassandra.yaml
|
||||
RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp && mv /etc/cassandra/cassandra.yaml.tmp /etc/cassandra/cassandra.yaml
|
||||
# RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp && mv /etc/cassandra/cassandra.yaml.tmp /etc/cassandra/cassandra.yaml
|
||||
|
||||
# Substitute environment variables in cassandra-rackdc.properties
|
||||
RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties
|
||||
# RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties
|
||||
|
||||
# COPY scripts/setup.sh /setup.sh
|
||||
# VOLUME "/scripts/setup.sh"
|
||||
|
||||
# Set the entrypoint
|
||||
ENTRYPOINT ["/scripts/setup.sh"]
|
||||
# ENTRYPOINT ["/scripts/setup.sh"]
|
||||
|
||||
# Expose Cassandra ports
|
||||
EXPOSE 7000 7001 7199 9042 9160
|
||||
|
|
46
README.md
46
README.md
|
@ -8,8 +8,10 @@ in dev environment, cassandra has been installed manually and nodetool is not in
|
|||
* dev should be aliegned to prod
|
||||
|
||||
|
||||
https://medium.com/@kayvan.sol2/deploying-apache-cassandra-cluster-3-nodes-with-docker-compose-3634ef8345e8
|
||||
|
||||
# DOCKER
|
||||
|
||||
## DOCKER instructions
|
||||
|
||||
aggiungere a /etc/host l'entry:
|
||||
|
||||
|
@ -23,13 +25,26 @@ force recreate docker image
|
|||
`docker compose build --no-cache`
|
||||
|
||||
check status:
|
||||
`docker exec -it cassandra-cassandra-1 nodetool status`
|
||||
`docker exec -it cassandra-1 nodetool status`
|
||||
|
||||
## import db
|
||||
|
||||
`./dump.sh`
|
||||
|
||||
`docker compose up --build`
|
||||
|
||||
attendere che tutto sia pronto e i db siano sincronizzati
|
||||
|
||||
TODO: definire esattamente come, tendenzialmente con nodetool status , nodetool gossip, etc
|
||||
|
||||
eseguire uno dopo l'altro, quando sono terminati
|
||||
* cassandra1: `docker exec -it cassandra1 /scripts/setup`
|
||||
* cassandra2: `docker exec -it cassandra2 /scripts/setup`
|
||||
* cassandra3: `docker exec -it cassandra3 /scripts/setup`
|
||||
|
||||
run a single service:
|
||||
* cassandra1: `docker-compose up cassandra-cassandra1 --build`
|
||||
* cassandra2: `docker-compose up cassandra-cassandra2 --build`
|
||||
* cassandra1: `docker-compose up cassandra1 --build`
|
||||
* cassandra2: `docker-compose up cassandra2 --build`
|
||||
* ,...
|
||||
|
||||
open bash on server
|
||||
|
@ -38,4 +53,27 @@ open bash on server
|
|||
* cassandra2: `docker exec -it cassandra2 /bin/bash`
|
||||
* cassandra3: `docker exec -it cassandra3 /bin/bash`
|
||||
|
||||
## check cassandra status
|
||||
|
||||
Check status
|
||||
`nodetool status`
|
||||
|
||||
|
||||
|
||||
Check if the Gossip protocol is enabled
|
||||
`nodetool info | grep -i gossip`
|
||||
|
||||
Check the status of the Gossip protocol
|
||||
`nodetool gossipinfo`
|
||||
|
||||
Check the communication between nodes
|
||||
`nodetool ring`
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Documentation
|
||||
|
||||
* [cassandra dump data](docs/dump.md)
|
||||
* [cassandra setup and import](docs/setup.md)
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
# Cluster name
|
||||
cluster_name: ${CASSANDRA_CLUSTER_NAME}
|
||||
# # Cluster name
|
||||
# cluster_name: ${CASSANDRA_CLUSTER_NAME}
|
||||
|
||||
# Addresses
|
||||
listen_address: ${CASSANDRA_LISTEN_ADDRESS}
|
||||
broadcast_address: ${CASSANDRA_BROADCAST_ADDRESS}
|
||||
# # Addresses
|
||||
# listen_address: ${CASSANDRA_LISTEN_ADDRESS}
|
||||
# broadcast_address: ${CASSANDRA_BROADCAST_ADDRESS}
|
||||
# rpc_address: 0.0.0.0
|
||||
broadcast_rpc_address: ${CASSANDRA_RPC_ADDRESS}
|
||||
# broadcast_rpc_address: ${CASSANDRA_RPC_ADDRESS}
|
||||
|
||||
# Seed nodes
|
||||
seed_provider:
|
||||
|
@ -21,12 +21,12 @@ commitlog_directory: /var/lib/cassandra/commitlog
|
|||
|
||||
saved_caches_directory: /var/lib/cassandra/saved_caches
|
||||
|
||||
client_encryption_options:
|
||||
enabled: false
|
||||
optional: false
|
||||
# client_encryption_options:
|
||||
# enabled: false
|
||||
# optional: false
|
||||
|
||||
# Tokens and allocation
|
||||
num_tokens: ${CASSANDRA_NUM_TOKENS}
|
||||
# num_tokens: ${CASSANDRA_NUM_TOKENS}
|
||||
allocate_tokens_for_local_replication_factor: 3
|
||||
|
||||
# Hinted handoff settings
|
||||
|
@ -122,7 +122,8 @@ request_timeout: 1000000ms
|
|||
slow_query_log_timeout: 500ms
|
||||
|
||||
# Snitch settings
|
||||
endpoint_snitch: GossipingPropertyFileSnitch
|
||||
# endpoint_snitch: GossipingPropertyFileSnitch
|
||||
endpoint_snitch: SimpleSnitch
|
||||
dynamic_snitch_update_interval: 100ms
|
||||
dynamic_snitch_reset_interval: 600000ms
|
||||
dynamic_snitch_badness_threshold: 1.0
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
cassandra1:
|
||||
build: .
|
||||
|
@ -18,11 +16,20 @@ services:
|
|||
- ./data/dumps/node1:/dump/snapshot
|
||||
- ./data/dumps/schema:/dump/schema
|
||||
- ./data/volumes/node1:/var/lib/cassandra
|
||||
- ./logs/node1:/var/log/cassandra
|
||||
- ./data/logs/node1:/var/log/cassandra
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "nodetool status"]
|
||||
interval: 2m
|
||||
start_period: 2m
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
ports:
|
||||
- "9042:9042"
|
||||
networks:
|
||||
- cassandra-net
|
||||
restart:
|
||||
on-failure
|
||||
|
||||
|
||||
cassandra2:
|
||||
build: .
|
||||
|
@ -36,16 +43,25 @@ services:
|
|||
- CASSANDRA_DC=DC1
|
||||
- CASSANDRA_RACK=RAC1
|
||||
- PRIMARY_NODE=false
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "nodetool status"]
|
||||
interval: 2m
|
||||
start_period: 2m
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
volumes:
|
||||
- ./scripts:/scripts
|
||||
- ./data/dumps/node2:/dump/snapshot
|
||||
- ./data/dumps/schema:/dump/schema
|
||||
- ./data/volumes/node2:/var/lib/cassandra
|
||||
- ./logs/node2:/var/log/cassandra
|
||||
- ./data/logs/node2:/var/log/cassandra
|
||||
networks:
|
||||
- cassandra-net
|
||||
depends_on:
|
||||
- cassandra1
|
||||
cassandra1:
|
||||
condition: service_healthy
|
||||
restart:
|
||||
on-failure
|
||||
|
||||
cassandra3:
|
||||
build: .
|
||||
|
@ -59,16 +75,27 @@ services:
|
|||
- CASSANDRA_DC=DC1
|
||||
- CASSANDRA_RACK=RAC1
|
||||
- PRIMARY_NODE=false
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "nodetool status"]
|
||||
interval: 2m
|
||||
start_period: 2m
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
volumes:
|
||||
- ./scripts:/scripts
|
||||
- ./data/dumps/node3:/dump/snapshot
|
||||
- ./data/dumps/schema:/dump/schema
|
||||
- ./data/volumes/node3:/var/lib/cassandra
|
||||
- ./logs/node3:/var/log/cassandra
|
||||
- ./data/logs/node3:/var/log/cassandra
|
||||
networks:
|
||||
- cassandra-net
|
||||
depends_on:
|
||||
- cassandra2
|
||||
cassandra2:
|
||||
condition: service_healthy
|
||||
restart:
|
||||
on-failure
|
||||
|
||||
|
||||
|
||||
networks:
|
||||
cassandra-net:
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
# Documentation: Exporting Data from Existing Cassandra Cluster
|
||||
|
||||
This process exports data from an existing Cassandra cluster by creating snapshots on each node and copying the data to a local directory.
|
||||
|
||||
The steps ensure a consistent and reliable backup of the keyspace data.
|
||||
|
||||
The snapshot creation and data synchronization steps are executed in parallel for all nodes to speed up the process and ensure consistency.
|
||||
|
||||
## Dump Process
|
||||
|
||||
The data dump process involves taking a snapshot of the keyspace from each Cassandra node, copying the snapshots locally, and exporting the keyspace schema. This process is performed in parallel for efficiency.
|
||||
|
||||
1. **Clear Old Snapshots:**
|
||||
- For each node, remove any existing snapshots with the specified tag to ensure a clean state.
|
||||
|
||||
2. **Create New Snapshots:**
|
||||
- For each node, create a new snapshot with the specified tag.
|
||||
|
||||
3. **Synchronize Snapshots Locally:**
|
||||
- Copy the snapshot data from each node to the local directory. Each table's data is copied into a directory named after the table.
|
||||
|
||||
4. **Export Keyspace Schema:**
|
||||
- Export the keyspace schema from the first node and save it locally.
|
||||
|
||||
### Directory Structure on Server
|
||||
- Each table in the keyspace has its own directory.
|
||||
- Inside each table's directory, there is a `snapshots` directory.
|
||||
- The `snapshots` directory contains subdirectories for each snapshot, named according to the snapshot tag.
|
||||
|
||||
### Local Directory Structure
|
||||
- The local directory mirrors the server's structure.
|
||||
- Each table's snapshot data is stored in a directory named after the table, inside the local dump directory.
|
||||
|
||||
By following this process, a consistent and reliable backup of the Cassandra keyspace data is achieved, ensuring that the data can be restored or migrated as needed.
|
||||
|
||||
## Directory Structure Example
|
||||
|
||||
### Server-Side Structure
|
||||
|
||||
On the server, the directory structure for the snapshots is organized as follows:
|
||||
|
||||
```plaintext
|
||||
/data
|
||||
└── dev_keyspace_1
|
||||
├── table1-abc1234567890abcdef1234567890abcdef
|
||||
│ └── snapshots
|
||||
│ └── dump_docker
|
||||
│ ├── manifest.json
|
||||
│ ├── nb-1-big-CompressionInfo.db
|
||||
│ ├── nb-1-big-Data.db
|
||||
│ ├── nb-1-big-Digest.crc32
|
||||
│ ├── nb-1-big-Filter.db
|
||||
│ ├── nb-1-big-Index.db
|
||||
│ ├── nb-1-big-Statistics.db
|
||||
│ ├── nb-1-big-Summary.db
|
||||
│ └── schema.cql
|
||||
├── table2-def4567890abcdef1234567890abcdef
|
||||
│ └── snapshots
|
||||
│ └── dump_docker
|
||||
│ ├── manifest.json
|
||||
│ ├── nb-1-big-CompressionInfo.db
|
||||
│ ├── nb-1-big-Data.db
|
||||
│ ├── nb-1-big-Digest.crc32
|
||||
│ ├── nb-1-big-Filter.db
|
||||
│ ├── nb-1-big-Index.db
|
||||
│ ├── nb-1-big-Statistics.db
|
||||
│ ├── nb-1-big-Summary.db
|
||||
│ └── schema.cql
|
||||
└── table3-ghi7890abcdef1234567890abcdef
|
||||
└── snapshots
|
||||
└── dump_docker
|
||||
├── manifest.json
|
||||
├── nb-1-big-CompressionInfo.db
|
||||
├── nb-1-big-Data.db
|
||||
├── nb-1-big-Digest.crc32
|
||||
├── nb-1-big-Filter.db
|
||||
├── nb-1-big-Index.db
|
||||
├── nb-1-big-Statistics.db
|
||||
├── nb-1-big-Summary.db
|
||||
└── schema.cql
|
||||
```
|
||||
|
||||
#### Local Directory Structure
|
||||
When copied locally, the directory structure is organized as follows:
|
||||
|
||||
```plaintext
|
||||
data/dumps
|
||||
├──schema
|
||||
│ ├── dev_keyspace_1_schema.cql
|
||||
└── node1
|
||||
├── table1
|
||||
│ ├── manifest.json
|
||||
│ ├── nb-1-big-CompressionInfo.db
|
||||
│ ├── nb-1-big-Data.db
|
||||
│ ├── nb-1-big-Digest.crc32
|
||||
│ ├── nb-1-big-Filter.db
|
||||
│ ├── nb-1-big-Index.db
|
||||
│ ├── nb-1-big-Statistics.db
|
||||
│ ├── nb-1-big-Summary.db
|
||||
│ └── schema.cql
|
||||
├── table2
|
||||
│ ├── manifest.json
|
||||
│ ├── nb-1-big-CompressionInfo.db
|
||||
│ ├── nb-1-big-Data.db
|
||||
│ ├── nb-1-big-Digest.crc32
|
||||
│ ├── nb-1-big-Filter.db
|
||||
│ ├── nb-1-big-Index.db
|
||||
│ ├── nb-1-big-Statistics.db
|
||||
│ ├── nb-1-big-Summary.db
|
||||
│ └── schema.cql
|
||||
└── table3
|
||||
├── manifest.json
|
||||
├── nb-1-big-CompressionInfo.db
|
||||
├── nb-1-big-Data.db
|
||||
├── nb-1-big-Digest.crc32
|
||||
├── nb-1-big-Filter.db
|
||||
├── nb-1-big-Index.db
|
||||
├── nb-1-big-Statistics.db
|
||||
├── nb-1-big-Summary.db
|
||||
└── schema.cql
|
||||
```
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
### Cassandra Cluster Setup and Data Migration Workflow
|
||||
|
||||
Workflow for setting up a Cassandra cluster with multiple nodes, creating keyspaces and schemas, and exporting and reimporting data. The process ensures synchronization across nodes and efficient data migration using snapshots.
|
||||
|
||||
#### Workflow Phases
|
||||
The workflow is divided into the following phases:
|
||||
1. **Startup Phase**: All nodes start Cassandra and ensure they are ready to accept connections.
|
||||
2. **Schema Creation Phase**: The primary node creates the keyspace and schema if they do not exist. This schema is then propagated to other nodes.
|
||||
3. **Data Import Phase**: Data is imported from snapshots using `sstableloader` only if the schema was newly created.
|
||||
|
||||
#### Phase 1: Startup Phase
|
||||
Each node starts Cassandra and waits for it to be ready before proceeding to the next phase.
|
||||
|
||||
- **Primary Node**: Starts Cassandra and waits for other nodes to signal they are ready.
|
||||
- **Non-Primary Nodes**: Wait for the primary node to be ready before starting Cassandra.
|
||||
|
||||
#### Phase 2: Schema Creation Phase
|
||||
After all nodes are confirmed to be ready, the primary node checks if the keyspace exists and creates it if it does not.
|
||||
|
||||
- **Primary Node**:
|
||||
- Checks if the keyspace exists.
|
||||
- If the keyspace does not exist, creates the keyspace and applies the schema.
|
||||
- Waits for the schema to propagate to all nodes.
|
||||
- **Non-Primary Nodes**:
|
||||
- Wait for the primary node to complete schema creation and propagation.
|
||||
|
||||
#### Phase 3: Data Import Phase
|
||||
Data is imported into the keyspace using `sstableloader` from the snapshots if the schema was newly created.
|
||||
|
||||
- **Primary Node**:
|
||||
- If the schema was created, imports data from the snapshots.
|
||||
- **Non-Primary Nodes**:
|
||||
- Wait for the primary node to complete the data import.
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash
|
||||
|
||||
|
||||
SNAPSHOT_DIR="$DUMP_DIR/snapshot"
|
||||
IP_ADDRESS=$(hostname -I | awk '{print $1}')
|
||||
|
||||
# Define a logging function
|
||||
log() {
|
||||
local MESSAGE="$1"
|
||||
echo -e "$MESSAGE" | tee -a /var/log/cassandra/import.log
|
||||
}
|
||||
|
||||
log "Importing snapshots using sstableloader..."
|
||||
for TABLE_DIR in $(ls $SNAPSHOT_DIR); do
|
||||
TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name
|
||||
log "Importing table: $TABLE_NAME from directory: $SNAPSHOT_DIR/$TABLE_DIR"
|
||||
sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$SNAPSHOT_DIR/$TABLE_DIR"
|
||||
cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2
|
||||
done
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
IP_ADDRESS=$(hostname -I | awk '{print $1}')
|
||||
|
||||
# Define a logging function
|
||||
log() {
|
||||
local MESSAGE="$1"
|
||||
echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_cassandra_ready.log
|
||||
}
|
||||
|
||||
log "Checking if Cassandra is ready..."
|
||||
|
||||
is_cassandra_ready() {
|
||||
cqlsh $IP_ADDRESS -e 'SHOW HOST' > /dev/null 2>&1
|
||||
}
|
||||
|
||||
is_cassandra_ready
|
||||
if [ $? -eq 0 ]; then
|
||||
log "Cassandra is ready."
|
||||
exit 0
|
||||
else
|
||||
log "Cassandra is not ready."
|
||||
exit 1
|
||||
fi
|
|
@ -0,0 +1,35 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Usage: is_keyspace_exists.sh [--keyspace <keyspace>]
|
||||
# Example: is_keyspace_exists.sh --keyspace dev_keyspace_1
|
||||
|
||||
KEYSPACE=${KEYSPACE:-}
|
||||
|
||||
# Parse arguments
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--keyspace)
|
||||
KEYSPACE="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check for required arguments or environment variables
|
||||
if [ -z "$KEYSPACE" ]; then
|
||||
echo "KEYSPACE is not set. Set it via --keyspace or KEYSPACE environment variable."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
IP_ADDRESS=$(hostname -I | awk '{print $1}')
|
||||
|
||||
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
|
||||
echo "Keyspace $KEYSPACE EXISTS"
|
||||
exit 0
|
||||
fi
|
||||
echo "Keyspace $KEYSPACE DOES NOT EXIST"
|
||||
exit 1
|
|
@ -0,0 +1,36 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Define a logging function
|
||||
log() {
|
||||
local MESSAGE="$1"
|
||||
echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_node_up.log
|
||||
}
|
||||
|
||||
# Default values
|
||||
NODE=${1:-$(hostname -I | awk '{print $1}')}
|
||||
CASSANDRA_RPC_ADDRESS=${2:-$CASSANDRA_RPC_ADDRESS}
|
||||
|
||||
log "Checking if node $NODE is up..."
|
||||
|
||||
is_node_up() {
|
||||
local NODE="$1"
|
||||
local NODE_STATUS=$(nodetool status -r)
|
||||
if echo "$NODE_STATUS" | grep -E "^UN" | grep "$NODE" > /dev/null; then
|
||||
return 0
|
||||
elif [ "$NODE" = "$CASSANDRA_RPC_ADDRESS" ]; then
|
||||
NODE_STATUS=$(nodetool status)
|
||||
if echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
is_node_up $NODE
|
||||
if [ $? -eq 0 ]; then
|
||||
log "Node $NODE is up."
|
||||
exit 0
|
||||
else
|
||||
log "Node $NODE is not up."
|
||||
exit 1
|
||||
fi
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/bash
|
||||
|
||||
# PRIMARY_NODE=${PRIMARY_NODE:-}
|
||||
echo PRIMARY_NODE: $PRIMARY_NODE
|
||||
|
||||
if [ "$PRIMARY_NODE" = "true" ]; then
|
||||
exit 0
|
||||
|
||||
else
|
||||
exit 1
|
||||
fi
|
|
@ -0,0 +1,32 @@
|
|||
#!/bin/bash
|
||||
|
||||
IP_ADDRESS=$(hostname -I | awk '{print $1}')
|
||||
SEEDS=(${CASSANDRA_SEEDS//,/ })
|
||||
|
||||
|
||||
# Define a logging function
|
||||
log() {
|
||||
local MESSAGE="$1"
|
||||
echo -e "$MESSAGE" | tee -a /var/log/cassandra/is_schema_agreed.log
|
||||
}
|
||||
|
||||
log "Checking if schema is agreed..."
|
||||
|
||||
is_schema_agreed() {
|
||||
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
|
||||
SCHEMA_NODES=$(nodetool describecluster | grep -A 1 "Schema versions:" | grep -o '\[.*\]' | tr -d '[]' | tr ',' '\n' | wc -l)
|
||||
if [ "$SCHEMA_NODES" -eq "${#SEEDS[@]}" ]; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
is_schema_agreed
|
||||
if [ $? -eq 0 ]; then
|
||||
log "Schema is agreed."
|
||||
exit 0
|
||||
else
|
||||
log "Schema is not agreed."
|
||||
exit 1
|
||||
fi
|
152
scripts/setup.sh
152
scripts/setup.sh
|
@ -8,87 +8,111 @@ log() {
|
|||
|
||||
log "RUNNING SETUP"
|
||||
|
||||
# Configuration
|
||||
KEYSPACE="dev_keyspace_1"
|
||||
DUMP_DIR="/dump" # Ensure DUMP_DIR is defined
|
||||
SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql" # Ensure DUMP_DIR is defined
|
||||
SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"
|
||||
CASSANDRA_SEEDS="cassandra1,cassandra2,cassandra3"
|
||||
STATUS_DIR="/var/log/cassandra"
|
||||
IP_ADDRESS=$(hostname -I | awk '{print $1}')
|
||||
DATA_DIR="/var/lib/cassandra/data/$KEYSPACE"
|
||||
SNAPSHOT_DIR="$DUMP_DIR/snapshot"
|
||||
PRIMARY_NODE=${PRIMARY_NODE:-false} # Default to false if not set
|
||||
SLEEP_DURATION=5 # Sleep duration in seconds for waits
|
||||
TIMEOUT=3000 # Timeout in seconds for waits
|
||||
|
||||
# Initialize SEEDS array
|
||||
SEEDS=(${CASSANDRA_SEEDS//,/ })
|
||||
|
||||
# Function to wait for all nodes to be in the 'UN' state
|
||||
wait_for_all_nodes_up() {
|
||||
SEEDS=(${CASSANDRA_SEEDS//,/ })
|
||||
# Function to wait for a command to succeed
|
||||
wait_for_command() {
|
||||
local COMMAND="$1"
|
||||
local TIMEOUT="$2"
|
||||
local START_TIME=$(date +%s)
|
||||
local END_TIME=$((START_TIME + TIMEOUT))
|
||||
|
||||
while true; do
|
||||
all_up=true
|
||||
for seed in "${SEEDS[@]}"; do
|
||||
NODE_STATUS=$(nodetool status -r)
|
||||
if ! echo "$NODE_STATUS" | grep -E "^UN.*$seed" > /dev/null; then
|
||||
if [ "$seed" = "$CASSANDRA_RPC_ADDRESS" ]; then
|
||||
NODE_STATUS=$(nodetool status)
|
||||
if ! echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
|
||||
log "Node $seed (self) is not up yet..."
|
||||
all_up=false
|
||||
break
|
||||
fi
|
||||
else
|
||||
log "Node $seed is not up yet..."
|
||||
all_up=false
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
if [ "$all_up" = true ]; then
|
||||
log "All nodes are up."
|
||||
if eval "$COMMAND"; then
|
||||
log "Command succeeded: $COMMAND"
|
||||
break
|
||||
else
|
||||
sleep 5
|
||||
local CURRENT_TIME=$(date +%s)
|
||||
if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
|
||||
log "Timed out waiting for command: $COMMAND"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Command failed: $COMMAND, still waiting"
|
||||
sleep $SLEEP_DURATION
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Function to wait for schema agreement across all nodes
|
||||
wait_for_schema_agreement() {
|
||||
while true; do
|
||||
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
|
||||
if nodetool describecluster | grep -q "Schema versions:"; then
|
||||
SCHEMA_COUNT=$(nodetool describecluster | grep -A 1 "Schema versions:" | wc -l)
|
||||
if [ "$SCHEMA_COUNT" -eq 2 ]; then
|
||||
log "Schema agreement reached."
|
||||
break
|
||||
else
|
||||
log "Waiting for schema agreement..."
|
||||
fi
|
||||
fi
|
||||
else
|
||||
log "Waiting for keyspace $KEYSPACE to be available..."
|
||||
# Function to check if a node is up
|
||||
is_node_up() {
|
||||
local NODE="$1"
|
||||
local NODE_STATUS=$(nodetool status -r)
|
||||
if echo "$NODE_STATUS" | grep -E "^UN" | grep "$NODE" > /dev/null; then
|
||||
return 0
|
||||
elif [ "$NODE" = "$CASSANDRA_RPC_ADDRESS" ]; then
|
||||
NODE_STATUS=$(nodetool status)
|
||||
if echo "$NODE_STATUS" | grep -E "^UN.*$(hostname -I | awk '{print $1}')" > /dev/null; then
|
||||
return 0
|
||||
fi
|
||||
sleep 5
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
# Function to wait for all nodes to be up
|
||||
wait_for_all_nodes_up() {
|
||||
for seed in "${SEEDS[@]}"; do
|
||||
wait_for_command "is_node_up $seed" $TIMEOUT
|
||||
done
|
||||
log "All nodes are up."
|
||||
}
|
||||
|
||||
# Function to check for schema agreement and if schema exists
|
||||
is_schema_agreed() {
|
||||
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
|
||||
SCHEMA_NODES=$(nodetool describecluster | grep -A 1 "Schema versions:" | grep -o '\[.*\]' | tr -d '[]' | tr ',' '\n' | wc -l)
|
||||
if [ "$SCHEMA_NODES" -eq "${#SEEDS[@]}" ]; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
# Function to check if keyspace exists
|
||||
is_keyspace_exists() {
|
||||
if cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
log "setup KEYSPACE: $KEYSPACE"
|
||||
log "setup DUMP_DIR: $DUMP_DIR"
|
||||
log "setup SCHEMA_PATH: $SCHEMA_PATH"
|
||||
log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS"
|
||||
log "setup STATUS_DIR: $STATUS_DIR"
|
||||
|
||||
# Check if the keyspace directory exists and is not empty
|
||||
if [ -d "$DATA_DIR" ] && [ "$(ls -A $DATA_DIR)" ]; then
|
||||
log "Data directory $DATA_DIR exists and is not empty. Skipping schema creation and data import."
|
||||
SCHEMA_CREATED=false
|
||||
else
|
||||
log "Data directory $DATA_DIR does not exist or is empty. Proceeding with schema creation and data import."
|
||||
SCHEMA_CREATED=true
|
||||
fi
|
||||
|
||||
# Wait for cassandra1 to be ready if this is not the primary node
|
||||
if [ "$PRIMARY_NODE" != "true" ]; then
|
||||
log "Waiting for cassandra1 to be ready..."
|
||||
/wait-for-it.sh cassandra1:9042 -t 60 -- log "cassandra1 is ready"
|
||||
wait_for_service cassandra1 9042 $TIMEOUT
|
||||
fi
|
||||
|
||||
# Start Cassandra in the background
|
||||
cassandra -R &
|
||||
# cassandra -R &
|
||||
|
||||
# Wait for Cassandra to be ready
|
||||
log "Waiting for Cassandra to start..."
|
||||
until cqlsh $IP_ADDRESS -e "SHOW HOST" > /dev/null 2>&1; do
|
||||
sleep 2
|
||||
done
|
||||
wait_for_command "cqlsh $IP_ADDRESS -e 'SHOW HOST' > /dev/null 2>&1" $TIMEOUT
|
||||
|
||||
# Log the value of PRIMARY_NODE for debugging
|
||||
log "PRIMARY_NODE is set to: $PRIMARY_NODE"
|
||||
|
@ -100,42 +124,30 @@ wait_for_all_nodes_up
|
|||
# Step 2: Create keyspace and schema on the primary node
|
||||
if [ "$PRIMARY_NODE" = "true" ]; then
|
||||
log "Checking if keyspace $KEYSPACE exists..."
|
||||
if ! cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
|
||||
if ! is_keyspace_exists; then
|
||||
log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..."
|
||||
cqlsh $IP_ADDRESS -f "$SCHEMA_PATH"
|
||||
else
|
||||
log "Keyspace $KEYSPACE already exists. Ensuring tables exist..."
|
||||
fi
|
||||
|
||||
# Signal to secondary nodes that schema creation is complete
|
||||
touch $STATUS_DIR/schema_created
|
||||
fi
|
||||
|
||||
# Step 3: Wait for schema to be created and agreed upon across all nodes
|
||||
log "Waiting for schema agreement across all nodes..."
|
||||
wait_for_schema_agreement
|
||||
wait_for_command "is_schema_agreed" $TIMEOUT
|
||||
|
||||
# Step 4: Import data using sstableloader if not previously imported
|
||||
if [ "$PRIMARY_NODE" = "true" ]; then
|
||||
# if [ "$SCHEMA_CREATED" = true ]; then
|
||||
log "Importing snapshots using sstableloader..."
|
||||
for TABLE_DIR in $(ls $DUMP_DIR); do
|
||||
for TABLE_DIR in $(ls $SNAPSHOT_DIR); do
|
||||
TABLE_NAME=$(basename $TABLE_DIR) # Extract table name from directory name
|
||||
log "Importing table: $TABLE_NAME from directory: $DUMP_DIR/$TABLE_DIR"
|
||||
sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$DUMP_DIR/$TABLE_DIR"
|
||||
log "Importing table: $TABLE_NAME from directory: $SNAPSHOT_DIR/$TABLE_DIR"
|
||||
sstableloader -d "$CASSANDRA_SEEDS" -v -k "$KEYSPACE" "$SNAPSHOT_DIR/$TABLE_DIR"
|
||||
cqlsh $IP_ADDRESS -k "$KEYSPACE" -e "select count(*) from $TABLE_NAME;" >&2
|
||||
done
|
||||
|
||||
# Signal to secondary nodes that import is complete
|
||||
touch $STATUS_DIR/import_complete
|
||||
else
|
||||
# Wait for import completion signal from primary node
|
||||
log "Waiting for import completion signal from primary node..."
|
||||
while [ ! -f "$STATUS_DIR/import_complete" ]; do
|
||||
sleep 5
|
||||
done
|
||||
fi
|
||||
# fi
|
||||
|
||||
log "FINISHED IMPORT"
|
||||
|
||||
# Keep the container running
|
||||
tail -f /dev/null
|
||||
# tail -f /dev/null
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
|
||||
# Keep the container running
|
||||
tail -f /dev/null
|
|
@ -0,0 +1,120 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Define a logging function
|
||||
log() {
|
||||
local MESSAGE="$1"
|
||||
echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log
|
||||
}
|
||||
|
||||
log "RUNNING SETUP"
|
||||
|
||||
# Configuration
|
||||
KEYSPACE=${KEYSPACE:-dev_keyspace_1}
|
||||
DUMP_DIR=${DUMP_DIR:-/dump} # Ensure DUMP_DIR is defined
|
||||
CASSANDRA_SEEDS=${CASSANDRA_SEEDS:-cassandra1,cassandra2,cassandra3}
|
||||
PRIMARY_NODE=${PRIMARY_NODE:-false} # Default to false if not set
|
||||
|
||||
IP_ADDRESS=$(hostname -I | awk '{print $1}')
|
||||
|
||||
SCHEMA_PATH="$DUMP_DIR/schema/${KEYSPACE}_schema.cql"
|
||||
DATA_DIR="/var/lib/cassandra/data/$KEYSPACE"
|
||||
SNAPSHOT_DIR="$DUMP_DIR/snapshot"
|
||||
|
||||
# Initialize SEEDS array
|
||||
SEEDS=(${CASSANDRA_SEEDS//,/ })
|
||||
|
||||
SLEEP_DURATION=5 # Sleep duration in seconds for waits
|
||||
TIMEOUT=300 # Timeout in seconds for waits
|
||||
|
||||
# Function to wait for a command to succeed
|
||||
wait_for_command() {
|
||||
local COMMAND="$1"
|
||||
local TIMEOUT="$2"
|
||||
local START_TIME=$(date +%s)
|
||||
local END_TIME=$((START_TIME + TIMEOUT))
|
||||
|
||||
while true; do
|
||||
if eval "$COMMAND"; then
|
||||
log "Command succeeded: $COMMAND"
|
||||
break
|
||||
else
|
||||
local CURRENT_TIME=$(date +%s)
|
||||
if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
|
||||
log "Timed out waiting for command: $COMMAND"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Command failed: $COMMAND, still waiting"
|
||||
sleep $SLEEP_DURATION
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
log "setup KEYSPACE: $KEYSPACE"
|
||||
log "setup DUMP_DIR: $DUMP_DIR"
|
||||
log "setup SCHEMA_PATH: $SCHEMA_PATH"
|
||||
log "setup CASSANDRA_SEEDS: $CASSANDRA_SEEDS"
|
||||
|
||||
# Check if the keyspace directory exists and is not empty
|
||||
if [ -d "$DATA_DIR" ] && [ "$(ls -A $DATA_DIR)" ]; then
|
||||
log "Data directory $DATA_DIR exists and is not empty. Skipping schema creation and data import."
|
||||
EMPTY_DB=false
|
||||
else
|
||||
log "Data directory $DATA_DIR does not exist or is empty. Proceeding with schema creation and data import."
|
||||
EMPTY_DB=true
|
||||
fi
|
||||
|
||||
# # Wait for cassandra1 to be ready if this is not the primary node
|
||||
# if [ "$PRIMARY_NODE" != "true" ]; then
|
||||
# wait_for_command "/scripts/is_node_up.sh --node cassandra1 --cassandra_rpc_address $IP_ADDRESS" $TIMEOUT
|
||||
# fi
|
||||
|
||||
# Start Cassandra in the background
|
||||
cassandra -R &
|
||||
|
||||
# Wait for Cassandra to be ready
|
||||
wait_for_command "/scripts/is_cassandra_ready.sh" $TIMEOUT
|
||||
|
||||
# Log the value of PRIMARY_NODE for debugging
|
||||
log "PRIMARY_NODE is set to: $PRIMARY_NODE"
|
||||
|
||||
# Step 1: Wait for all nodes to be up and ready
|
||||
log "Waiting for all nodes to be up and ready..."
|
||||
wait_for_command "/scripts/is_node_up.sh --node $seed " $TIMEOUT
|
||||
// TODO: aspettare tutti i nodi
|
||||
|
||||
# Function to wait for all nodes to be up
|
||||
wait_for_all_nodes_up() {
|
||||
for seed in "${SEEDS[@]}"; do
|
||||
wait_for_command "/scripts/is_node_up.sh --node $seed " $TIMEOUT
|
||||
done
|
||||
log "All nodes are up."
|
||||
}
|
||||
|
||||
wait_for_all_nodes_up
|
||||
|
||||
# Step 2: Create keyspace and schema on the primary node
|
||||
if [ "$PRIMARY_NODE" = "true" ]; then
|
||||
log "Checking if keyspace $KEYSPACE exists..."
|
||||
if ! /scripts/is_keyspace_exists.sh --keyspace "$KEYSPACE"; then
|
||||
log "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..."
|
||||
cqlsh $IP_ADDRESS -f "$SCHEMA_PATH"
|
||||
else
|
||||
log "Keyspace $KEYSPACE already exists. Ensuring tables exist..."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Step 3: Wait for schema to be created and agreed upon across all nodes
|
||||
log "Waiting for schema agreement across all nodes..."
|
||||
wait_for_command "/scripts/is_schema_agreed.sh --keyspace $KEYSPACE --cassandra_seeds $CASSANDRA_SEEDS" $TIMEOUT
|
||||
|
||||
# Step 4: Import data using sstableloader if not previously imported
|
||||
if [ "$EMPTY_DB" = true ]; then
|
||||
log "Importing snapshots using sstableloader..."
|
||||
/scripts/import.sh --keyspace "$KEYSPACE" --dump_dir "$SNAPSHOT_DIR" --cassandra_seeds "$CASSANDRA_SEEDS"
|
||||
fi
|
||||
|
||||
log "FINISHED IMPORT"
|
||||
|
||||
# Keep the container running
|
||||
tail -f /dev/null
|
|
@ -0,0 +1,31 @@
|
|||
#!/bin/bash
|
||||
|
||||
COMMAND="$1"
|
||||
TIMEOUT="$2"
|
||||
SLEEP_DURATION=5 # Sleep duration in seconds for waits
|
||||
|
||||
log() {
|
||||
local MESSAGE="$1"
|
||||
echo -e "$MESSAGE" | tee -a /var/log/cassandra/setup.log
|
||||
}
|
||||
|
||||
wait_for_command() {
|
||||
local START_TIME=$(date +%s)
|
||||
local END_TIME=$((START_TIME + TIMEOUT))
|
||||
|
||||
while true; do
|
||||
if eval "$COMMAND"; then
|
||||
log "Command succeeded: $COMMAND"
|
||||
break
|
||||
else
|
||||
local CURRENT_TIME=$(date +%s)
|
||||
if [ "$CURRENT_TIME" -ge "$END_TIME" ]; then
|
||||
log "Timed out waiting for command: $COMMAND"
|
||||
exit 1
|
||||
fi
|
||||
sleep $SLEEP_DURATION
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
wait_for_command
|
Loading…
Reference in New Issue