diff --git a/openstack-tf/common_setups/50-docker-swarm.tf b/openstack-tf/common_setups/50-docker-swarm.tf deleted file mode 100644 index e69de29..0000000 diff --git a/openstack-tf/d4s-production/basic-infrastructure/README-docker-swarm.md b/openstack-tf/d4s-production/basic-infrastructure/README-docker-swarm.md new file mode 100644 index 0000000..3ea3b3b --- /dev/null +++ b/openstack-tf/d4s-production/basic-infrastructure/README-docker-swarm.md @@ -0,0 +1,9 @@ +# Docker Swarm + +Docker Swarm cluster behind a Octavia balancer L4 + +* One Octavia Load balancer +* 3 VMs for the master nodes with their server group (soft anti affinity) +* 7 VMs for the worker nodes with their server group (soft anti affinity) +* 1 VM for the NFS service +* 1 dedicated network, for the traffic of the NFS exports diff --git a/openstack-tf/d4s-production/basic-infrastructure/docker-swarm.tf b/openstack-tf/d4s-production/basic-infrastructure/docker-swarm.tf new file mode 120000 index 0000000..f15e823 --- /dev/null +++ b/openstack-tf/d4s-production/basic-infrastructure/docker-swarm.tf @@ -0,0 +1 @@ +../../docker_swarm_setup/docker-swarm.tf \ No newline at end of file diff --git a/openstack-tf/d4s-production/basic-infrastructure/production-swarm.auto.tfvars b/openstack-tf/d4s-production/basic-infrastructure/production-swarm.auto.tfvars new file mode 100644 index 0000000..666db9d --- /dev/null +++ b/openstack-tf/d4s-production/basic-infrastructure/production-swarm.auto.tfvars @@ -0,0 +1,15 @@ +octavia_swarm_data = { + swarm_lb_name = "d4s-production-cloud-swarm-l4" + swarm_lb_description = "L4 balancer that serves the D4Science production Docker Swarm cluster" + swarm_lb_name = "d4s-production-cloud-swarm-l4" + octavia_flavor = "octavia_amphora-mvcpu-ha" + octavia_flavor_id = "394988b5-6603-4a1e-a939-8e177c6681c7" + swarm_lb_hostname = "swarm-lb" + swarm_octavia_main_ip = "10.1.40.30" + swarm_octavia_main_cidr = "10.1.40.30/32" + # The following aren't available when the module runs so we have to get them with the command + # openstack --os-cloud d4s-pre port list -f value | grep octavia-lb-vrrp + # This means that the execution will fail + octavia_vrrp_ip_1 = "10.1.43.97/32" + octavia_vrrp_ip_2 = "10.1.44.78/32" +} diff --git a/openstack-tf/d4s-production/basic-infrastructure/swarm-variables.tf b/openstack-tf/d4s-production/basic-infrastructure/swarm-variables.tf new file mode 120000 index 0000000..150f70a --- /dev/null +++ b/openstack-tf/d4s-production/basic-infrastructure/swarm-variables.tf @@ -0,0 +1 @@ +../../docker_swarm_setup/swarm-variables.tf \ No newline at end of file diff --git a/openstack-tf/docker_swarm_setup/docker-swarm.tf b/openstack-tf/docker_swarm_setup/docker-swarm.tf new file mode 100644 index 0000000..c9b9032 --- /dev/null +++ b/openstack-tf/docker_swarm_setup/docker-swarm.tf @@ -0,0 +1,501 @@ +# +# Server groups for both the masters and the workers +# +resource "openstack_compute_servergroup_v2" "swarm_masters" { + name = "swarm_masters" + policies = ["anti-affinity"] +} +resource "openstack_compute_servergroup_v2" "swarm_workers" { + name = "swarm_workers" + policies = ["soft-anti-affinity"] +} +# +# Network for the NFS traffic +# +resource "openstack_networking_network_v2" "swarm_nfs_net" { + name = var.swarm_nfs_private_network.network_name + admin_state_up = "true" + external = "false" + description = var.swarm_nfs_private_network.network_description + dns_domain = var.dns_zone.zone_name + mtu = var.mtu_size + port_security_enabled = true + shared = false + region = var.main_region +} + +# Subnet +resource "openstack_networking_subnet_v2" "swarm_nfs_subnet" { + name = "swarm-nfs-net" + description = "Subnet used by the Swarm cluster and the NFS service" + network_id = openstack_networking_network_v2.swarm_nfs_net.id + cidr = var.swarm_nfs_private_network.network_cidr + dns_nameservers = var.resolvers_ip + ip_version = 4 + enable_dhcp = true + no_gateway = true + allocation_pool { + start = var.swarm_nfs_private_network.allocation_pool_start + end = var.swarm_nfs_private_network.allocation_pool_end + } +} + +# +# Security groups +# +resource "openstack_networking_secgroup_v2" "swarm_internal_traffic" { + name = "swarm_internal_docker_traffic" + delete_default_rules = "true" + description = "Traffic between the Docker Swarm nodes" +} +resource "openstack_networking_secgroup_rule_v2" "everything_udp" { + security_group_id = openstack_networking_secgroup_v2.swarm_internal_traffic.id + description = "UDP traffic between Swarm nodes" + direction = "ingress" + ethertype = "IPv4" + protocol = "udp" + remote_ip_prefix = var.main_private_subnet.cidr +} +resource "openstack_networking_secgroup_rule_v2" "everything_tcp" { + security_group_id = openstack_networking_secgroup_v2.swarm_internal_traffic.id + description = "TCP traffic between Swarm nodes" + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + remote_ip_prefix = var.main_private_subnet.cidr +} +resource "openstack_networking_secgroup_v2" "swarm_nfs_traffic" { + name = "docker_swarm_nfs" + delete_default_rules = "true" + description = "Traffic between Docker Swarm and the NFS service" +} +resource "openstack_networking_secgroup_rule_v2" "swarm_nfs_udp" { + security_group_id = openstack_networking_secgroup_v2.swarm_nfs_traffic.id + description = "UDP traffic" + direction = "ingress" + ethertype = "IPv4" + protocol = "udp" + remote_ip_prefix = var.swarm_nfs_private_network.network_cidr +} +resource "openstack_networking_secgroup_rule_v2" "swarm_nfs_tcp" { + security_group_id = openstack_networking_secgroup_v2.swarm_nfs_traffic.id + description = "TCP traffic" + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + remote_ip_prefix = var.swarm_nfs_private_network.network_cidr +} + +# +# Swarm Manager VMs +# +# Instance +resource "openstack_compute_instance_v2" "docker_swarm_managers" { + count = var.docker_swarm_data.mgr_count + name = format("%s-%02d", var.docker_swarm_data.mgr_name, count.index+1) + availability_zone_hints = var.availability_zones_names.availability_zone_no_gpu + flavor_name = var.docker_swarm_data.mgr_flavor + key_pair = var.ssh_key_file.name + security_groups = [openstack_networking_secgroup_v2.default.name,openstack_networking_secgroup_v2.swarm_internal_traffic.name] + scheduler_hints { + group = openstack_compute_servergroup_v2.swarm_masters.id + } + block_device { + uuid = var.ubuntu_2204.uuid + source_type = "image" + volume_size = 10 + boot_index = 0 + destination_type = "volume" + delete_on_termination = false + } + + block_device { + source_type = "blank" + volume_size = var.docker_swarm_data.mgr_data_disk_size + boot_index = -1 + destination_type = "volume" + delete_on_termination = false + } + + network { + name = var.main_private_network.name + fixed_ip_v4 = var.swarm_managers_ip.*[count.index] + } + network { + name = var.swarm_nfs_private_network.network_name + } + + user_data = "${file("${var.ubuntu2204_data_file}")}" + depends_on = [ openstack_networking_subnet_v2.swarm_nfs_subnet ] +} + +# Swarm worker nodes +resource "openstack_compute_instance_v2" "docker_swarm_workers" { + count = var.docker_swarm_data.worker_count + name = format("%s-%02d", var.docker_swarm_data.worker_name, count.index+1) + availability_zone_hints = var.availability_zones_names.availability_zone_no_gpu + flavor_name = var.docker_swarm_data.worker_flavor + key_pair = var.ssh_key_file.name + security_groups = [openstack_networking_secgroup_v2.default.name,openstack_networking_secgroup_v2.swarm_internal_traffic.name] + scheduler_hints { + group = openstack_compute_servergroup_v2.swarm_workers.id + } + block_device { + uuid = var.ubuntu_2204.uuid + source_type = "image" + volume_size = 10 + boot_index = 0 + destination_type = "volume" + delete_on_termination = false + } + + block_device { + source_type = "blank" + volume_size = var.docker_swarm_data.worker_data_disk_size + boot_index = -1 + destination_type = "volume" + delete_on_termination = false + } + + network { + name = var.main_private_network.name + } + network { + name = var.swarm_nfs_private_network.network_name + } + + user_data = "${file("${var.ubuntu2204_data_file}")}" + depends_on = [ openstack_networking_subnet_v2.swarm_nfs_subnet ] +} + +# NFS server +# Block device +resource "openstack_blockstorage_volume_v3" "swarm_nfs_data_vol" { + name = var.docker_swarm_data.nfs_server_data_disk_name + size = var.docker_swarm_data.nfs_server_data_disk_size +} + +# Instance +resource "openstack_compute_instance_v2" "swarm_nfs_server" { + name = var.docker_swarm_data.nfs_server_name + availability_zone_hints = var.availability_zones_names.availability_zone_no_gpu + flavor_name = var.docker_swarm_data.nfs_server_flavor + key_pair = var.ssh_key_file.name + security_groups = [openstack_networking_secgroup_v2.default.name,openstack_networking_secgroup_v2.swarm_nfs_traffic.name] + block_device { + uuid = var.ubuntu_2204.uuid + source_type = "image" + volume_size = 10 + boot_index = 0 + destination_type = "volume" + delete_on_termination = false + } + + network { + name = var.main_private_network.name + } + network { + name = var.swarm_nfs_private_network.network_name + fixed_ip_v4 = var.swarm_nfs_private_network.server_ip + } + + user_data = "${file("${var.ubuntu2204_data_file}")}" + depends_on = [ openstack_networking_subnet_v2.swarm_nfs_subnet ] +} + +# Attach the additional volume +resource "openstack_compute_volume_attach_v2" "swarm_nfs_data_attach_vol" { + instance_id = openstack_compute_instance_v2.swarm_nfs_server.id + volume_id = openstack_blockstorage_volume_v3.swarm_nfs_data_vol.id + device = var.docker_swarm_data.nfs_server_data_disk_device + depends_on = [openstack_compute_instance_v2.swarm_nfs_server] +} + +# +# Octavia +# +# Swarm load balancer. L4, backed by Octavia +resource "openstack_lb_loadbalancer_v2" "swarm_lb" { + vip_subnet_id = var.main_private_subnet_id + name = var.octavia_swarm_data.swarm_lb_name + description = var.octavia_swarm_data.swarm_lb_description + flavor_id = var.octavia_swarm_data.octavia_flavor_id + vip_address = var.octavia_swarm_data.swarm_octavia_main_ip + loadbalancer_provider = "amphora" +} + +# Allocate a floating IP +resource "openstack_networking_floatingip_v2" "swarm_lb_ip" { + pool = var.floating_ip_pools.main_public_ip_pool + # The DNS association does not work because of a bug in the OpenStack API + # dns_name = "main-lb" + # dns_domain = var.dns_zone.zone_name + description = var.octavia_swarm_data.swarm_lb_description +} + +resource "openstack_networking_floatingip_associate_v2" "swarm_lb" { + floating_ip = openstack_networking_floatingip_v2.swarm_lb_ip.address + port_id = openstack_lb_loadbalancer_v2.swarm_lb.vip_port_id +} + +locals { + swarm_recordset_name = "${var.octavia_swarm_data.swarm_lb_hostname}.${var.dns_zone.zone_name}" + ccp_recordset_name = "ccp.${var.dns_zone.zone_name}" + cdn_recordset_name = "cdn.${var.dns_zone.zone_name}" + conductor_recordset_name = "conductor.${var.dns_zone.zone_name}" +} + +resource "openstack_dns_recordset_v2" "swarm_lb_dns_recordset" { + zone_id = var.dns_zone_id + name = local.swarm_recordset_name + description = "Public IP address of the load balancer in front of Docker Swarm" + ttl = 8600 + type = "A" + records = [openstack_networking_floatingip_v2.swarm_lb_ip.address] +} + +resource "openstack_dns_recordset_v2" "ccp_dns_recordset" { + zone_id = var.dns_zone_id + name = local.ccp_recordset_name + description = "CCP hostname" + ttl = 8600 + type = "CNAME" + records = [local.swarm_recordset_name] +} + +resource "openstack_dns_recordset_v2" "cdn_dns_recordset" { + zone_id = var.dns_zone_id + name = local.cdn_recordset_name + description = "CDN hostname" + ttl = 8600 + type = "CNAME" + records = [local.swarm_recordset_name] +} + +resource "openstack_dns_recordset_v2" "conductor_dns_recordset" { + zone_id = var.dns_zone_id + name = local.conductor_recordset_name + description = "Conductor hostname" + ttl = 8600 + type = "CNAME" + records = [local.swarm_recordset_name] +} + +# Main HAPROXY stats listener +resource "openstack_lb_listener_v2" "swarm_haproxy_stats_listener" { + loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id + protocol = "TCP" + protocol_port = 8880 + description = "Listener for the stats of the Docker Swarm HAPROXY instances" + name = "swarm_haproxy_stats_listener" + allowed_cidrs = [var.ssh_sources.d4s_vpn_1_cidr,var.ssh_sources.d4s_vpn_2_cidr,var.ssh_sources.s2i2s_vpn_1_cidr,var.ssh_sources.s2i2s_vpn_2_cidr] + +} + +resource "openstack_lb_pool_v2" "swarm_haproxy_stats_pool" { + listener_id = openstack_lb_listener_v2.swarm_haproxy_stats_listener.id + protocol = "TCP" + lb_method = "LEAST_CONNECTIONS" + name = "swarm-haproxy-lb-stats" + description = "Pool for the stats of the main HAPROXY instances" + persistence { + type = "SOURCE_IP" + } +} + +resource "openstack_lb_members_v2" "swarm_haproxy_stats_pool_members" { + pool_id = openstack_lb_pool_v2.swarm_haproxy_stats_pool.id + member { + name = "swarm mgr haproxy 1" + address = var.docker_swarm_data.mgr1_ip + protocol_port = 8880 + } + member { + name = "swarm mgr haproxy 2" + address = var.docker_swarm_data.mgr2_ip + protocol_port = 8880 + } + member { + name = "swarm mgr haproxy 3" + address = var.docker_swarm_data.mgr3_ip + protocol_port = 8880 + } +} + +resource "openstack_lb_monitor_v2" "swarm_haproxy_stats_monitor" { + pool_id = openstack_lb_pool_v2.swarm_haproxy_stats_pool.id + name = "swarm_haproxy_stats_monitor" + type = "TCP" + delay = 20 + timeout = 5 + max_retries = 3 + admin_state_up = true +} + +# HAPROXY HTTP +resource "openstack_lb_listener_v2" "swarm_haproxy_http_listener" { + loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id + protocol = "TCP" + protocol_port = 80 + description = "HTTP listener of the Docker Swarm HAPROXY instances" + name = "swarm_haproxy_http_listener" + admin_state_up = true +} + +resource "openstack_lb_pool_v2" "swarm_haproxy_http_pool" { + listener_id = openstack_lb_listener_v2.swarm_haproxy_http_listener.id + protocol = "PROXYV2" + lb_method = "LEAST_CONNECTIONS" + name = "swarm-haproxy-lb-http" + description = "Pool for the HTTP listener of the Docker Swarm HAPROXY instances" + persistence { + type = "SOURCE_IP" + } + admin_state_up = true +} + +resource "openstack_lb_members_v2" "swarm_haproxy_http_pool_members" { + pool_id = openstack_lb_pool_v2.swarm_haproxy_http_pool.id + member { + name = "swarm mgr haproxy 1" + address = var.docker_swarm_data.mgr1_ip + protocol_port = 80 + } + member { + name = "swarm mgr haproxy 2" + address = var.docker_swarm_data.mgr2_ip + protocol_port = 80 + } + member { + name = "swarm mgr haproxy 3" + address = var.docker_swarm_data.mgr3_ip + protocol_port = 80 + } +} + +resource "openstack_lb_monitor_v2" "swarm_haproxy_http_monitor" { + pool_id = openstack_lb_pool_v2.swarm_haproxy_http_pool.id + name = "swarm_haproxy_http_monitor" + type = "HTTP" + http_method = "GET" + url_path = "/_haproxy_health_check" + expected_codes = "200" + delay = 20 + timeout = 5 + max_retries = 3 + admin_state_up = true +} + +# HAPROXY HTTPS +resource "openstack_lb_listener_v2" "swarm_haproxy_https_listener" { + loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id + protocol = "TCP" + protocol_port = 443 + description = "HTTPS listener of the main HAPROXY instances" + name = "swarm_haproxy_https_listener" + admin_state_up = true +} + +resource "openstack_lb_pool_v2" "swarm_haproxy_https_pool" { + listener_id = openstack_lb_listener_v2.swarm_haproxy_https_listener.id + protocol = "PROXYV2" + lb_method = "LEAST_CONNECTIONS" + name = "swarm-haproxy-lb-https" + description = "Pool for the HTTPS listener of the Docker Swarm HAPROXY instances" + persistence { + type = "SOURCE_IP" + } + admin_state_up = true +} + +resource "openstack_lb_members_v2" "swarm_haproxy_https_pool_members" { + pool_id = openstack_lb_pool_v2.swarm_haproxy_https_pool.id + member { + name = "swarm mgr haproxy 1" + address = var.docker_swarm_data.mgr1_ip + protocol_port = 443 + } + member { + name = "swarm mgr haproxy 2" + address = var.docker_swarm_data.mgr2_ip + protocol_port = 443 + } + member { + name = "swarm mgr haproxy 3" + address = var.docker_swarm_data.mgr3_ip + protocol_port = 443 + } +} + +resource "openstack_lb_monitor_v2" "swarm_haproxy_https_monitor" { + pool_id = openstack_lb_pool_v2.swarm_haproxy_https_pool.id + name = "swarm_haproxy_https_monitor" + type = "HTTPS" + http_method = "GET" + url_path = "/_haproxy_health_check" + expected_codes = "200" + delay = 20 + timeout = 5 + max_retries = 3 + admin_state_up = true +} + +# HAPROXY HTTP on port 8080 +resource "openstack_lb_listener_v2" "swarm_haproxy_8080_listener" { + loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id + protocol = "TCP" + protocol_port = 8080 + description = "HTTP port 8080 listener of the Docker Swarm HAPROXY instances" + name = "swarm_haproxy_8080_listener" + admin_state_up = true +} + +resource "openstack_lb_pool_v2" "swarm_haproxy_8080_pool" { + listener_id = openstack_lb_listener_v2.swarm_haproxy_8080_listener.id + protocol = "PROXYV2" + lb_method = "LEAST_CONNECTIONS" + name = "swarm-haproxy-lb-http-8080" + description = "Pool for the HTTP port 8080 listener of the Docker Swarm HAPROXY instances" + persistence { + type = "SOURCE_IP" + } + admin_state_up = true +} + +resource "openstack_lb_members_v2" "swarm_haproxy_8080_pool_members" { + pool_id = openstack_lb_pool_v2.swarm_haproxy_8080_pool.id + member { + name = "swarm mgr haproxy 1" + address = var.docker_swarm_data.mgr1_ip + protocol_port = 8080 + } + member { + name = "swarm mgr haproxy 2" + address = var.docker_swarm_data.mgr2_ip + protocol_port = 8080 + } + member { + name = "swarm mgr haproxy 3" + address = var.docker_swarm_data.mgr3_ip + protocol_port = 8080 + } +} + +resource "openstack_lb_monitor_v2" "swarm_haproxy_8080_monitor" { + pool_id = openstack_lb_pool_v2.swarm_haproxy_8080_pool.id + name = "swarm_haproxy_8080_monitor" + type = "HTTP" + http_method = "GET" + url_path = "/_haproxy_health_check" + expected_codes = "200" + delay = 20 + timeout = 5 + max_retries = 3 + admin_state_up = true +} + +output "swarm_loadbalancer_ip" { + description = "Docker Swarm Load balancer IP address" + value = openstack_lb_loadbalancer_v2.swarm_lb.vip_address +} + diff --git a/openstack-tf/docker_swarm_setup/swarm-variables.tf b/openstack-tf/docker_swarm_setup/swarm-variables.tf new file mode 100644 index 0000000..a6e41cb --- /dev/null +++ b/openstack-tf/docker_swarm_setup/swarm-variables.tf @@ -0,0 +1,57 @@ +variable "docker_swarm_data" { + type = map(string) + default = { + mgr_name = "swarm-mgr" + mgr1_ip = "10.1.40.31" + mgr1_cidr = "10.1.40.31/32" + mgr2_ip = "10.1.40.32" + mgr2_cidr = "10.1.40.32/32" + mgr3_ip = "10.1.40.33" + mgr3_cidr = "10.1.40.33/32" + mgr_count = 3 + mgr_flavor = "m1.large" + mgr_data_disk_size = 100 + worker_name = "swarm-worker" + worker_count = 8 + worker_flavor = "m1.xxl" + worker_data_disk_size = 200 + nfs_server_name = "swarm-nfs-server" + nfs_server_flavor = "m1.medium" + nfs_server_data_disk_name = "Swarm NFS server data Disk" + nfs_server_data_disk_size = 200 + nfs_server_data_disk_device = "/dev/vdb" + } +} + +variable "swarm_managers_ip" { + type = list(string) + default = ["10.1.40.31", "10.1.40.32", "10.1.40.33"] + +} + +variable "octavia_swarm_data" { + type = map(string) + default = { + swarm_lb_name = "d4s-production-cloud-swarm-l4" + swarm_lb_description = "L4 balancer that serves the D4Science production Docker Swarm cluster" + swarm_lb_name = "d4s-production-cloud-swarm-l4" + octavia_flavor = "octavia_amphora-mvcpu-ha" + octavia_flavor_id = "394988b5-6603-4a1e-a939-8e177c6681c7" + swarm_lb_hostname = "swarm-lb" + swarm_octavia_main_ip = "10.1.40.30" + swarm_octavia_main_cidr = "10.1.40.30/32" + } +} + +variable "swarm_nfs_private_network" { + type = map(string) + default = { + network_name = "swarm-nfs-net" + network_description = "Network used by the swarm nodes and the NFS service" + network_cidr = "192.168.4.0/23" + allocation_pool_start = "192.168.4.100" + allocation_pool_end = "192.168.5.254" + server_ip = "192.168.4.10" + server_cidr = "192.168.4.5/23" + } +}