From 9bf69282e83aab55ea0b173490aa8cc886673a4e Mon Sep 17 00:00:00 2001 From: Alfredo Oliviero Date: Wed, 24 Jul 2024 18:05:37 +0200 Subject: [PATCH] sistemando import, per ora problemi con CLUSTERING ORDER BY (column1 ) --- Dockerfile | 18 ++-- README.md | 5 + cassandra-rackdc.properties | 1 + cassandra.yaml | 207 +++++++++++++++++++++++++++++++++--- docker-compose.yml | 31 ++---- scripts/setup.sh | 51 +++++++-- 6 files changed, 254 insertions(+), 59 deletions(-) diff --git a/Dockerfile b/Dockerfile index ee1d17d..f9d10e5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ # Use the official Cassandra 4.1.3 image FROM cassandra:4.1.3 -RUN apt-get update && apt-get install -y iputils-ping less locate - +# Install gettext to use envsubst +RUN apt-get update && apt-get install -y iputils-ping less locate gettext-base # Environment variables to configure Cassandra ENV CASSANDRA_CLUSTER_NAME=TestCluster @@ -20,18 +20,18 @@ COPY data/dev_keyspace_schema.cql /docker-entrypoint-initdb.d/dev_keyspace_schem COPY scripts/setup.sh /setup.sh RUN chmod +x /setup.sh -# Copy cassandra.yaml +# Copy cassandra.yaml and cassandra-rackdc.properties COPY cassandra.yaml /etc/cassandra/ - -# Copy cassandra-rackdc.properties and substitute environment variables COPY cassandra-rackdc.properties /etc/cassandra/ -RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties + +# Substitute environment variables in cassandra.yaml +RUN envsubst < /etc/cassandra/cassandra.yaml > /etc/cassandra/cassandra.yaml.tmp && mv /etc/cassandra/cassandra.yaml.tmp /etc/cassandra/cassandra.yaml + +# Substitute environment variables in cassandra-rackdc.properties +RUN envsubst < /etc/cassandra/cassandra-rackdc.properties > /etc/cassandra/cassandra-rackdc.properties.tmp && mv /etc/cassandra/cassandra-rackdc.properties.tmp /etc/cassandra/cassandra-rackdc.properties # Set the entrypoint ENTRYPOINT ["/setup.sh"] # Expose Cassandra ports EXPOSE 7000 7001 7199 9042 9160 - -# Add health check -# HEALTHCHECK --interval=30s --timeout=10s --retries=5 CMD cqlsh -e "DESCRIBE KEYSPACES" || exit 1 diff --git a/README.md b/README.md index fcd95a7..0612be7 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,15 @@ aggiungere a /etc/host l'entry: start composer: `docker compose up --build` +force recreate docker image + +`docker compose build --no-cache` + check status: `docker exec -it cassandra-cassandra-1 nodetool status` + run a single service: * cassandra1: `docker-compose up cassandra-cassandra1 --build` * cassandra2: `docker-compose up cassandra-cassandra2 --build` diff --git a/cassandra-rackdc.properties b/cassandra-rackdc.properties index eec8a07..01b1e2c 100644 --- a/cassandra-rackdc.properties +++ b/cassandra-rackdc.properties @@ -1,2 +1,3 @@ dc=${CASSANDRA_DC} rack=${CASSANDRA_RACK} + diff --git a/cassandra.yaml b/cassandra.yaml index 51fe15c..b74e6fa 100644 --- a/cassandra.yaml +++ b/cassandra.yaml @@ -1,27 +1,204 @@ -# Seed provider +# Cluster name +cluster_name: ${CASSANDRA_CLUSTER_NAME} + +# Addresses +listen_address: ${CASSANDRA_LISTEN_ADDRESS} +broadcast_address: ${CASSANDRA_BROADCAST_ADDRESS} +# rpc_address: 0.0.0.0 +broadcast_rpc_address: ${CASSANDRA_RPC_ADDRESS} + +# Seed nodes seed_provider: - class_name: org.apache.cassandra.locator.SimpleSeedProvider parameters: - - seeds: "${CASSANDRA_SEEDS}" + - seeds: "cassandra1,cassandra2,cassandra3" -# Address to bind to and tell other Cassandra nodes to connect to -listen_address: "${CASSANDRA_LISTEN_ADDRESS}" +# Directories +data_file_directories: + - /var/lib/cassandra/data -# Address to broadcast to other Cassandra nodes -broadcast_address: "${CASSANDRA_BROADCAST_ADDRESS}" +commitlog_directory: /var/lib/cassandra/commitlog -# Address to bind the RPC server (default: same as listen_address) -rpc_address: 0.0.0.0 +saved_caches_directory: /var/lib/cassandra/saved_caches -# Address to broadcast to clients -broadcast_rpc_address: "${CASSANDRA_RPC_ADDRESS}" +# Tokens and allocation +num_tokens: ${CASSANDRA_NUM_TOKENS} +allocate_tokens_for_local_replication_factor: 3 -# Native transport port for CQL +# Hinted handoff settings +hinted_handoff_enabled: true +max_hint_window: 3h +hinted_handoff_throttle: 1024KiB +max_hints_delivery_threads: 2 +hints_flush_period: 10000ms +max_hints_file_size: 128MiB +auto_hints_cleanup_enabled: false + +# Batchlog settings +batchlog_replay_throttle: 1024KiB + +# Authentication and authorization +authenticator: AllowAllAuthenticator +authorizer: AllowAllAuthorizer +role_manager: CassandraRoleManager +network_authorizer: AllowAllNetworkAuthorizer +roles_validity: 2000ms +permissions_validity: 2000ms +credentials_validity: 2000ms + +# Partitioner +partitioner: org.apache.cassandra.dht.Murmur3Partitioner + +# Commit log settings +cdc_enabled: false +disk_failure_policy: stop +commit_failure_policy: stop +commitlog_sync: periodic +commitlog_sync_period: 10000ms +commitlog_segment_size: 32MiB + +# Cache settings +prepared_statements_cache_size: +key_cache_size: +key_cache_save_period: 4h +row_cache_size: 0MiB +row_cache_save_period: 0s +counter_cache_size: +counter_cache_save_period: 7200s + +# Concurrent operations settings +concurrent_reads: 32 +concurrent_writes: 32 +concurrent_counter_writes: 32 +concurrent_materialized_view_writes: 32 + +# Memtable allocation +memtable_allocation_type: heap_buffers + +# Index summary settings +index_summary_capacity: +index_summary_resize_interval: 60m + +# Fsync settings +trickle_fsync: false +trickle_fsync_interval: 10240KiB + +# Ports +storage_port: 7000 +ssl_storage_port: 7001 + +# Native transport settings +start_native_transport: true native_transport_port: 9042 +native_transport_allow_older_protocols: true +rpc_keepalive: true -# Snitch configuration +# Backup settings +incremental_backups: false +snapshot_before_compaction: false +auto_snapshot: true +snapshot_links_per_second: 0 + +# SSTable settings +column_index_size: 64KiB +column_index_cache_size: 2KiB +concurrent_materialized_view_builders: 1 +compaction_throughput: 64MiB/s +sstable_preemptive_open_interval: 50MiB +uuid_sstable_identifiers_enabled: false + +# Request timeouts +read_request_timeout: 1000000ms +range_request_timeout: 1000000ms +write_request_timeout: 1000000ms +counter_write_request_timeout: 1000000ms +cas_contention_timeout: 1000000ms +truncate_request_timeout: 1000000ms +request_timeout: 1000000ms +slow_query_log_timeout: 500ms + +# Snitch settings endpoint_snitch: GossipingPropertyFileSnitch +dynamic_snitch_update_interval: 100ms +dynamic_snitch_reset_interval: 600000ms +dynamic_snitch_badness_threshold: 1.0 -# DC and RACK settings -dc: ${CASSANDRA_DC} -rack: ${CASSANDRA_RACK} +# Encryption options +server_encryption_options: + internode_encryption: none + legacy_ssl_storage_port_enabled: false + keystore: conf/.keystore + keystore_password: cassandra + require_client_auth: false + truststore: conf/.truststore + truststore_password: cassandra + require_endpoint_verification: false +client_encryption_options: + enabled: false + keystore: conf/.keystore + keystore_password: cassandra + require_client_auth: false + +# Inter-node communication settings +internode_compression: dc +inter_dc_tcp_nodelay: false + +# Trace settings +trace_type_query_ttl: 1d +trace_type_repair_ttl: 7d + +# User-defined functions +user_defined_functions_enabled: false +scripted_user_defined_functions_enabled: false + +# Transparent data encryption +transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + +# Tombstone settings +tombstone_warn_threshold: 1000 +tombstone_failure_threshold: 100000 + +# Replica filtering protection +replica_filtering_protection: + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + +# Batch size settings +batch_size_warn_threshold: 5KiB +batch_size_fail_threshold: 50KiB +unlogged_batch_across_partitions_warn_threshold: 10 + +# Compaction settings +compaction_large_partition_warning_threshold: 100MiB +compaction_tombstone_warning_threshold: 100000 + +# Audit logging options +audit_logging_options: + enabled: false + logger: + - class_name: BinAuditLogger + +# Diagnostic events +diagnostic_events_enabled: false + +# Repaired data tracking +repaired_data_tracking_for_range_reads_enabled: false +repaired_data_tracking_for_partition_reads_enabled: false +report_unconfirmed_repaired_data_mismatches: false + +# Feature flags +materialized_views_enabled: false +sasi_indexes_enabled: false +transient_replication_enabled: false +drop_compact_storage_enabled: false diff --git a/docker-compose.yml b/docker-compose.yml index a8281a5..1b62272 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: '3.8' services: cassandra1: - image: cassandra:4.1.3 + build: . container_name: cassandra1 env_file: - .env @@ -21,14 +21,9 @@ services: - "9042:9042" networks: - cassandra-net - # healthcheck: - # test: ["CMD", "cqlsh", "-e", "DESCRIBE KEYSPACES"] - # interval: 30s - # timeout: 10s - # retries: 5 cassandra2: - image: cassandra:4.1.3 + build: . container_name: cassandra2 env_file: - .env @@ -45,17 +40,11 @@ services: - ./logs/node2:/var/log/cassandra networks: - cassandra-net - # depends_on: - # cassandra1: - # condition: service_healthy - # healthcheck: - # test: ["CMD", "cqlsh", "-e", "DESCRIBE KEYSPACES"] - # interval: 30s - # timeout: 10s - # retries: 5 + depends_on: + - cassandra1 cassandra3: - image: cassandra:4.1.3 + build: . container_name: cassandra3 env_file: - .env @@ -72,14 +61,8 @@ services: - ./logs/node3:/var/log/cassandra networks: - cassandra-net - # depends_on: - # cassandra2: - # condition: service_healthy - # healthcheck: - # test: ["CMD", "cqlsh", "-e", "DESCRIBE KEYSPACES"] - # interval: 30s - # timeout: 10s - # retries: 5 + depends_on: + - cassandra2 networks: cassandra-net: diff --git a/scripts/setup.sh b/scripts/setup.sh index a57e1f7..e90e7bb 100644 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -1,35 +1,64 @@ #!/bin/bash +echo -e "RUNNING SETUP" >&2 KEYSPACE="dev_keyspace" +DUMP_DIR="/dump" # Ensure DUMP_DIR is defined + +# Determine the IP address of the current node +IP_ADDRESS=$(hostname -I | awk '{print $1}') + +# Print the IP address for debugging +echo -e "Node IP Address: $IP_ADDRESS" >&2 + +# Wait for cassandra1 to be ready if this is not the primary node +if [ "$PRIMARY_NODE" != "true" ]; then + echo -e "Waiting for cassandra1 to be ready..." >&2 + /wait-for-it.sh cassandra1:9042 -t 60 -- echo "cassandra1 is ready" >&2 +fi # Start Cassandra in the background cassandra -R & # Wait for Cassandra to be ready -echo "Waiting for Cassandra to start..." -until cqlsh -e "SHOW HOST" > /dev/null 2>&1; do +echo -e "Waiting for Cassandra to start..." >&2 +until cqlsh $IP_ADDRESS -e "SHOW HOST" > /dev/null 2>&1; do sleep 2 done +# Print the value of PRIMARY_NODE for debugging +echo -e "PRIMARY_NODE is set to: $PRIMARY_NODE" >&2 + if [ "$PRIMARY_NODE" = "true" ]; then # Check if the keyspace exists - echo "Checking if keyspace $KEYSPACE exists..." - if ! cqlsh -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then - echo "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..." - cqlsh -f /docker-entrypoint-initdb.d/dev_keyspace_schema.cql + echo -e "Checking if keyspace $KEYSPACE exists..." >&2 + if ! cqlsh $IP_ADDRESS -e "DESCRIBE KEYSPACE $KEYSPACE;" > /dev/null 2>&1; then + echo -e "Keyspace $KEYSPACE does not exist. Creating keyspace and tables..." >&2 + cqlsh $IP_ADDRESS -f /docker-entrypoint-initdb.d/dev_keyspace_schema.cql else - echo "Keyspace $KEYSPACE already exists. Skipping creation." + echo -e "Keyspace $KEYSPACE already exists. Ensuring tables exist..." >&2 + + # Manually define the schema for legacy tables + cqlsh $IP_ADDRESS -e "CREATE TABLE IF NOT EXISTS dev_keyspace.hashtaggedposts ( + id UUID PRIMARY KEY, + -- other columns + );" + # Add similar statements for all other tables fi +else + echo -e "This is not the primary node. Skipping keyspace and table creation." >&2 fi # Copy snapshots to the Cassandra data directory -echo "Copying snapshots..." +echo -e "Copying snapshots..." >&2 cp -r $DUMP_DIR/* /var/lib/cassandra/data/ # Import snapshots into the Cassandra data directory -echo "Importing snapshots..." -for table in $(ls $DUMP_DIR); do - nodetool import $KEYSPACE $table +echo -e "Importing snapshots..." >&2 +for table_dir in $(ls $DUMP_DIR); do + table_name=$(echo $table_dir | sed 's/-[a-f0-9]\{32\}$//') + echo -e "Importing table: $table_name from directory: $table_dir" >&2 + echo -e "Command: nodetool import $KEYSPACE $table_name /var/lib/cassandra/data/$KEYSPACE/$table_dir" >&2 + nodetool import $KEYSPACE $table_name /var/lib/cassandra/data/$KEYSPACE/$table_dir done # Keep the container running