sistemando import, per ora problemi con CLUSTERING ORDER BY (column1 )

This commit is contained in:
Alfredo Oliviero 2024-07-24 18:05:37 +02:00
parent f163144a65
commit 9bf69282e8
6 changed files with 254 additions and 59 deletions

View File

@ -1,8 +1,8 @@
# Use the official Cassandra 4.1.3 image # Use the official Cassandra 4.1.3 image
FROM cassandra:4.1.3 FROM cassandra:4.1.3
RUN apt-get update && apt-get install -y iputils-ping less locate # Install gettext to use envsubst
RUN apt-get update && apt-get install -y iputils-ping less locate gettext-base
# Environment variables to configure Cassandra # Environment variables to configure Cassandra
ENV CASSANDRA_CLUSTER_NAME=TestCluster ENV CASSANDRA_CLUSTER_NAME=TestCluster
@ -20,18 +20,18 @@ COPY data/dev_keyspace_schema.cql /docker-entrypoint-initdb.d/dev_keyspace_schem
COPY scripts/setup.sh /setup.sh COPY scripts/setup.sh /setup.sh
RUN chmod +x /setup.sh RUN chmod +x /setup.sh
# Copy cassandra.yaml # Copy cassandra.yaml and cassandra-rackdc.properties
COPY cassandra.yaml /etc/cassandra/ COPY cassandra.yaml /etc/cassandra/
# Copy cassandra-rackdc.properties and substitute environment variables
COPY cassandra-rackdc.properties /etc/cassandra/ COPY cassandra-rackdc.properties /etc/cassandra/
RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties
# Substitute environment variables in cassandra.yaml
RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp && mv /etc/cassandra/cassandra.yaml.tmp /etc/cassandra/cassandra.yaml
# Substitute environment variables in cassandra-rackdc.properties
RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties
# Set the entrypoint # Set the entrypoint
ENTRYPOINT ["/setup.sh"] ENTRYPOINT ["/setup.sh"]
# Expose Cassandra ports # Expose Cassandra ports
EXPOSE 7000 7001 7199 9042 9160 EXPOSE 7000 7001 7199 9042 9160
# Add health check
# HEALTHCHECK --interval=30s --timeout=10s --retries=5 CMD cqlsh -e "DESCRIBE KEYSPACES" || exit 1

View File

@ -18,10 +18,15 @@ aggiungere a /etc/host l'entry:
start composer: start composer:
`docker compose up --build` `docker compose up --build`
force recreate docker image
`docker compose build --no-cache`
check status: check status:
`docker exec -it cassandra-cassandra-1 nodetool status` `docker exec -it cassandra-cassandra-1 nodetool status`
run a single service: run a single service:
* cassandra1: `docker-compose up cassandra-cassandra1 --build` * cassandra1: `docker-compose up cassandra-cassandra1 --build`
* cassandra2: `docker-compose up cassandra-cassandra2 --build` * cassandra2: `docker-compose up cassandra-cassandra2 --build`

View File

@ -1,2 +1,3 @@
dc=${CASSANDRA_DC} dc=${CASSANDRA_DC}
rack=${CASSANDRA_RACK} rack=${CASSANDRA_RACK}

View File

@ -1,27 +1,204 @@
# Seed provider # Cluster name
cluster_name: ${CASSANDRA_CLUSTER_NAME}
# Addresses
listen_address: ${CASSANDRA_LISTEN_ADDRESS}
broadcast_address: ${CASSANDRA_BROADCAST_ADDRESS}
# rpc_address: 0.0.0.0
broadcast_rpc_address: ${CASSANDRA_RPC_ADDRESS}
# Seed nodes
seed_provider: seed_provider:
- class_name: org.apache.cassandra.locator.SimpleSeedProvider - class_name: org.apache.cassandra.locator.SimpleSeedProvider
parameters: parameters:
- seeds: "${CASSANDRA_SEEDS}" - seeds: "cassandra1,cassandra2,cassandra3"
# Address to bind to and tell other Cassandra nodes to connect to # Directories
listen_address: "${CASSANDRA_LISTEN_ADDRESS}" data_file_directories:
- /var/lib/cassandra/data
# Address to broadcast to other Cassandra nodes commitlog_directory: /var/lib/cassandra/commitlog
broadcast_address: "${CASSANDRA_BROADCAST_ADDRESS}"
# Address to bind the RPC server (default: same as listen_address) saved_caches_directory: /var/lib/cassandra/saved_caches
rpc_address: 0.0.0.0
# Address to broadcast to clients # Tokens and allocation
broadcast_rpc_address: "${CASSANDRA_RPC_ADDRESS}" num_tokens: ${CASSANDRA_NUM_TOKENS}
allocate_tokens_for_local_replication_factor: 3
# Native transport port for CQL # Hinted handoff settings
hinted_handoff_enabled: true
max_hint_window: 3h
hinted_handoff_throttle: 1024KiB
max_hints_delivery_threads: 2
hints_flush_period: 10000ms
max_hints_file_size: 128MiB
auto_hints_cleanup_enabled: false
# Batchlog settings
batchlog_replay_throttle: 1024KiB
# Authentication and authorization
authenticator: AllowAllAuthenticator
authorizer: AllowAllAuthorizer
role_manager: CassandraRoleManager
network_authorizer: AllowAllNetworkAuthorizer
roles_validity: 2000ms
permissions_validity: 2000ms
credentials_validity: 2000ms
# Partitioner
partitioner: org.apache.cassandra.dht.Murmur3Partitioner
# Commit log settings
cdc_enabled: false
disk_failure_policy: stop
commit_failure_policy: stop
commitlog_sync: periodic
commitlog_sync_period: 10000ms
commitlog_segment_size: 32MiB
# Cache settings
prepared_statements_cache_size:
key_cache_size:
key_cache_save_period: 4h
row_cache_size: 0MiB
row_cache_save_period: 0s
counter_cache_size:
counter_cache_save_period: 7200s
# Concurrent operations settings
concurrent_reads: 32
concurrent_writes: 32
concurrent_counter_writes: 32
concurrent_materialized_view_writes: 32
# Memtable allocation
memtable_allocation_type: heap_buffers
# Index summary settings
index_summary_capacity:
index_summary_resize_interval: 60m
# Fsync settings
trickle_fsync: false
trickle_fsync_interval: 10240KiB
# Ports
storage_port: 7000
ssl_storage_port: 7001
# Native transport settings
start_native_transport: true
native_transport_port: 9042 native_transport_port: 9042
native_transport_allow_older_protocols: true
rpc_keepalive: true
# Snitch configuration # Backup settings
incremental_backups: false
snapshot_before_compaction: false
auto_snapshot: true
snapshot_links_per_second: 0
# SSTable settings
column_index_size: 64KiB
column_index_cache_size: 2KiB
concurrent_materialized_view_builders: 1
compaction_throughput: 64MiB/s
sstable_preemptive_open_interval: 50MiB
uuid_sstable_identifiers_enabled: false
# Request timeouts
read_request_timeout: 1000000ms
range_request_timeout: 1000000ms
write_request_timeout: 1000000ms
counter_write_request_timeout: 1000000ms
cas_contention_timeout: 1000000ms
truncate_request_timeout: 1000000ms
request_timeout: 1000000ms
slow_query_log_timeout: 500ms
# Snitch settings
endpoint_snitch: GossipingPropertyFileSnitch endpoint_snitch: GossipingPropertyFileSnitch
dynamic_snitch_update_interval: 100ms
dynamic_snitch_reset_interval: 600000ms
dynamic_snitch_badness_threshold: 1.0
# DC and RACK settings # Encryption options
dc: ${CASSANDRA_DC} server_encryption_options:
rack: ${CASSANDRA_RACK} internode_encryption: none
legacy_ssl_storage_port_enabled: false
keystore: conf/.keystore
keystore_password: cassandra
require_client_auth: false
truststore: conf/.truststore
truststore_password: cassandra
require_endpoint_verification: false
client_encryption_options:
enabled: false
keystore: conf/.keystore
keystore_password: cassandra
require_client_auth: false
# Inter-node communication settings
internode_compression: dc
inter_dc_tcp_nodelay: false
# Trace settings
trace_type_query_ttl: 1d
trace_type_repair_ttl: 7d
# User-defined functions
user_defined_functions_enabled: false
scripted_user_defined_functions_enabled: false
# Transparent data encryption
transparent_data_encryption_options:
enabled: false
chunk_length_kb: 64
cipher: AES/CBC/PKCS5Padding
key_alias: testing:1
key_provider:
- class_name: org.apache.cassandra.security.JKSKeyProvider
parameters:
- keystore: conf/.keystore
keystore_password: cassandra
store_type: JCEKS
key_password: cassandra
# Tombstone settings
tombstone_warn_threshold: 1000
tombstone_failure_threshold: 100000
# Replica filtering protection
replica_filtering_protection:
cached_rows_warn_threshold: 2000
cached_rows_fail_threshold: 32000
# Batch size settings
batch_size_warn_threshold: 5KiB
batch_size_fail_threshold: 50KiB
unlogged_batch_across_partitions_warn_threshold: 10
# Compaction settings
compaction_large_partition_warning_threshold: 100MiB
compaction_tombstone_warning_threshold: 100000
# Audit logging options
audit_logging_options:
enabled: false
logger:
- class_name: BinAuditLogger
# Diagnostic events
diagnostic_events_enabled: false
# Repaired data tracking
repaired_data_tracking_for_range_reads_enabled: false
repaired_data_tracking_for_partition_reads_enabled: false
report_unconfirmed_repaired_data_mismatches: false
# Feature flags
materialized_views_enabled: false
sasi_indexes_enabled: false
transient_replication_enabled: false
drop_compact_storage_enabled: false

View File

@ -2,7 +2,7 @@ version: '3.8'
services: services:
cassandra1: cassandra1:
image: cassandra:4.1.3 build: .
container_name: cassandra1 container_name: cassandra1
env_file: env_file:
- .env - .env
@ -21,14 +21,9 @@ services:
- "9042:9042" - "9042:9042"
networks: networks:
- cassandra-net - cassandra-net
# healthcheck:
# test: ["CMD", "cqlsh", "-e", "DESCRIBE KEYSPACES"]
# interval: 30s
# timeout: 10s
# retries: 5
cassandra2: cassandra2:
image: cassandra:4.1.3 build: .
container_name: cassandra2 container_name: cassandra2
env_file: env_file:
- .env - .env
@ -45,17 +40,11 @@ services:
- ./logs/node2:/var/log/cassandra - ./logs/node2:/var/log/cassandra
networks: networks:
- cassandra-net - cassandra-net
# depends_on: depends_on:
# cassandra1: - cassandra1
# condition: service_healthy
# healthcheck:
# test: ["CMD", "cqlsh", "-e", "DESCRIBE KEYSPACES"]
# interval: 30s
# timeout: 10s
# retries: 5
cassandra3: cassandra3:
image: cassandra:4.1.3 build: .
container_name: cassandra3 container_name: cassandra3
env_file: env_file:
- .env - .env
@ -72,14 +61,8 @@ services:
- ./logs/node3:/var/log/cassandra - ./logs/node3:/var/log/cassandra
networks: networks:
- cassandra-net - cassandra-net
# depends_on: depends_on:
# cassandra2: - cassandra2
# condition: service_healthy
# healthcheck:
# test: ["CMD", "cqlsh", "-e", "DESCRIBE KEYSPACES"]
# interval: 30s
# timeout: 10s
# retries: 5
networks: networks:
cassandra-net: cassandra-net:

View File

@ -1,35 +1,64 @@
#!/bin/bash #!/bin/bash
echo -e "RUNNING SETUP" >&2
KEYSPACE="dev_keyspace" KEYSPACE="dev_keyspace"
DUMP_DIR="/dump" # Ensure DUMP_DIR is defined
# Determine the IP address of the current node
IP_ADDRESS=$(hostname -I | awk '{print $1}')
# Print the IP address for debugging
echo -e "Node IP Address: $IP_ADDRESS" >&2
# Wait for cassandra1 to be ready if this is not the primary node
if [ "$PRIMARY_NODE" != "true" ]; then
echo -e "Waiting for cassandra1 to be ready..." >&2
/wait-for-it.sh cassandra1:9042 -t 60 -- echo "cassandra1 is ready" >&2
fi
# Start Cassandra in the background # Start Cassandra in the background
cassandra -R & cassandra -R &
# Wait for Cassandra to be ready # Wait for Cassandra to be ready
echo "Waiting for Cassandra to start..." echo -e "Waiting for Cassandra to start..." >&2
until cqlsh -e "SHOW HOST" > /dev/null 2>&1; do until cqlsh $IP_ADDRESS -e "SHOW HOST" > /dev/null 2>&1; do
sleep 2 sleep 2
done done
# Print the value of PRIMARY_NODE for debugging
echo -e "PRIMARY_NODE is set to: $PRIMARY_NODE" >&2
if [ "$PRIMARY_NODE" = "true" ]; then if [ "$PRIMARY_NODE" = "true" ]; then
# Check if the keyspace exists # Check if the keyspace exists
echo "Checking if keyspace $KEYSPACE exists..." echo -e "Checking if keyspace $KEYSPACE exists..." >&2
if ! cqlsh -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then if ! cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then
echo "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..." echo -e "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..." >&2
cqlsh -f /docker-entrypoint-initdb.d/dev_keyspace_schema.cql cqlsh $IP_ADDRESS -f /docker-entrypoint-initdb.d/dev_keyspace_schema.cql
else else
echo "Keyspace $KEYSPACE already exists. Skipping creation." echo -e "Keyspace $KEYSPACE already exists. Ensuring tables exist..." >&2
# Manually define the schema for legacy tables
cqlsh $IP_ADDRESS -e "CREATE TABLE IF NOT EXISTS dev_keyspace.hashtaggedposts (
id UUID PRIMARY KEY,
-- other columns
);"
# Add similar statements for all other tables
fi fi
else
echo -e "This is not the primary node. Skipping keyspace and table creation." >&2
fi fi
# Copy snapshots to the Cassandra data directory # Copy snapshots to the Cassandra data directory
echo "Copying snapshots..." echo -e "Copying snapshots..." >&2
cp -r $DUMP_DIR/* /var/lib/cassandra/data/ cp -r $DUMP_DIR/* /var/lib/cassandra/data/
# Import snapshots into the Cassandra data directory # Import snapshots into the Cassandra data directory
echo "Importing snapshots..." echo -e "Importing snapshots..." >&2
for table in $(ls $DUMP_DIR); do for table_dir in $(ls $DUMP_DIR); do
nodetool import $KEYSPACE $table table_name=$(echo $table_dir | sed 's/-[a-f0-9]\{32\}$//')
echo -e "Importing table: $table_name from directory: $table_dir" >&2
echo -e "Command: nodetool import $KEYSPACE $table_name /var/lib/cassandra/data/$KEYSPACE/$table_dir" >&2
nodetool import $KEYSPACE $table_name /var/lib/cassandra/data/$KEYSPACE/$table_dir
done done
# Keep the container running # Keep the container running