Add the Docker Swarm resources.

This commit is contained in:
Andrea Dell'Amico 2023-11-06 20:04:28 +01:00
parent 0124413fc1
commit 4482ef3920
Signed by untrusted user: andrea.dellamico
GPG Key ID: 147ABE6CEB9E20FF
7 changed files with 584 additions and 0 deletions

View File

@ -0,0 +1,9 @@
# Docker Swarm
Docker Swarm cluster behind a Octavia balancer L4
* One Octavia Load balancer
* 3 VMs for the master nodes with their server group (soft anti affinity)
* 7 VMs for the worker nodes with their server group (soft anti affinity)
* 1 VM for the NFS service
* 1 dedicated network, for the traffic of the NFS exports

View File

@ -0,0 +1 @@
../../docker_swarm_setup/docker-swarm.tf

View File

@ -0,0 +1,15 @@
octavia_swarm_data = {
swarm_lb_name = "d4s-production-cloud-swarm-l4"
swarm_lb_description = "L4 balancer that serves the D4Science production Docker Swarm cluster"
swarm_lb_name = "d4s-production-cloud-swarm-l4"
octavia_flavor = "octavia_amphora-mvcpu-ha"
octavia_flavor_id = "394988b5-6603-4a1e-a939-8e177c6681c7"
swarm_lb_hostname = "swarm-lb"
swarm_octavia_main_ip = "10.1.40.30"
swarm_octavia_main_cidr = "10.1.40.30/32"
# The following aren't available when the module runs so we have to get them with the command
# openstack --os-cloud d4s-pre port list -f value | grep octavia-lb-vrrp
# This means that the execution will fail
octavia_vrrp_ip_1 = "10.1.43.97/32"
octavia_vrrp_ip_2 = "10.1.44.78/32"
}

View File

@ -0,0 +1 @@
../../docker_swarm_setup/swarm-variables.tf

View File

@ -0,0 +1,501 @@
#
# Server groups for both the masters and the workers
#
resource "openstack_compute_servergroup_v2" "swarm_masters" {
name = "swarm_masters"
policies = ["anti-affinity"]
}
resource "openstack_compute_servergroup_v2" "swarm_workers" {
name = "swarm_workers"
policies = ["soft-anti-affinity"]
}
#
# Network for the NFS traffic
#
resource "openstack_networking_network_v2" "swarm_nfs_net" {
name = var.swarm_nfs_private_network.network_name
admin_state_up = "true"
external = "false"
description = var.swarm_nfs_private_network.network_description
dns_domain = var.dns_zone.zone_name
mtu = var.mtu_size
port_security_enabled = true
shared = false
region = var.main_region
}
# Subnet
resource "openstack_networking_subnet_v2" "swarm_nfs_subnet" {
name = "swarm-nfs-net"
description = "Subnet used by the Swarm cluster and the NFS service"
network_id = openstack_networking_network_v2.swarm_nfs_net.id
cidr = var.swarm_nfs_private_network.network_cidr
dns_nameservers = var.resolvers_ip
ip_version = 4
enable_dhcp = true
no_gateway = true
allocation_pool {
start = var.swarm_nfs_private_network.allocation_pool_start
end = var.swarm_nfs_private_network.allocation_pool_end
}
}
#
# Security groups
#
resource "openstack_networking_secgroup_v2" "swarm_internal_traffic" {
name = "swarm_internal_docker_traffic"
delete_default_rules = "true"
description = "Traffic between the Docker Swarm nodes"
}
resource "openstack_networking_secgroup_rule_v2" "everything_udp" {
security_group_id = openstack_networking_secgroup_v2.swarm_internal_traffic.id
description = "UDP traffic between Swarm nodes"
direction = "ingress"
ethertype = "IPv4"
protocol = "udp"
remote_ip_prefix = var.main_private_subnet.cidr
}
resource "openstack_networking_secgroup_rule_v2" "everything_tcp" {
security_group_id = openstack_networking_secgroup_v2.swarm_internal_traffic.id
description = "TCP traffic between Swarm nodes"
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
remote_ip_prefix = var.main_private_subnet.cidr
}
resource "openstack_networking_secgroup_v2" "swarm_nfs_traffic" {
name = "docker_swarm_nfs"
delete_default_rules = "true"
description = "Traffic between Docker Swarm and the NFS service"
}
resource "openstack_networking_secgroup_rule_v2" "swarm_nfs_udp" {
security_group_id = openstack_networking_secgroup_v2.swarm_nfs_traffic.id
description = "UDP traffic"
direction = "ingress"
ethertype = "IPv4"
protocol = "udp"
remote_ip_prefix = var.swarm_nfs_private_network.network_cidr
}
resource "openstack_networking_secgroup_rule_v2" "swarm_nfs_tcp" {
security_group_id = openstack_networking_secgroup_v2.swarm_nfs_traffic.id
description = "TCP traffic"
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
remote_ip_prefix = var.swarm_nfs_private_network.network_cidr
}
#
# Swarm Manager VMs
#
# Instance
resource "openstack_compute_instance_v2" "docker_swarm_managers" {
count = var.docker_swarm_data.mgr_count
name = format("%s-%02d", var.docker_swarm_data.mgr_name, count.index+1)
availability_zone_hints = var.availability_zones_names.availability_zone_no_gpu
flavor_name = var.docker_swarm_data.mgr_flavor
key_pair = var.ssh_key_file.name
security_groups = [openstack_networking_secgroup_v2.default.name,openstack_networking_secgroup_v2.swarm_internal_traffic.name]
scheduler_hints {
group = openstack_compute_servergroup_v2.swarm_masters.id
}
block_device {
uuid = var.ubuntu_2204.uuid
source_type = "image"
volume_size = 10
boot_index = 0
destination_type = "volume"
delete_on_termination = false
}
block_device {
source_type = "blank"
volume_size = var.docker_swarm_data.mgr_data_disk_size
boot_index = -1
destination_type = "volume"
delete_on_termination = false
}
network {
name = var.main_private_network.name
fixed_ip_v4 = var.swarm_managers_ip.*[count.index]
}
network {
name = var.swarm_nfs_private_network.network_name
}
user_data = "${file("${var.ubuntu2204_data_file}")}"
depends_on = [ openstack_networking_subnet_v2.swarm_nfs_subnet ]
}
# Swarm worker nodes
resource "openstack_compute_instance_v2" "docker_swarm_workers" {
count = var.docker_swarm_data.worker_count
name = format("%s-%02d", var.docker_swarm_data.worker_name, count.index+1)
availability_zone_hints = var.availability_zones_names.availability_zone_no_gpu
flavor_name = var.docker_swarm_data.worker_flavor
key_pair = var.ssh_key_file.name
security_groups = [openstack_networking_secgroup_v2.default.name,openstack_networking_secgroup_v2.swarm_internal_traffic.name]
scheduler_hints {
group = openstack_compute_servergroup_v2.swarm_workers.id
}
block_device {
uuid = var.ubuntu_2204.uuid
source_type = "image"
volume_size = 10
boot_index = 0
destination_type = "volume"
delete_on_termination = false
}
block_device {
source_type = "blank"
volume_size = var.docker_swarm_data.worker_data_disk_size
boot_index = -1
destination_type = "volume"
delete_on_termination = false
}
network {
name = var.main_private_network.name
}
network {
name = var.swarm_nfs_private_network.network_name
}
user_data = "${file("${var.ubuntu2204_data_file}")}"
depends_on = [ openstack_networking_subnet_v2.swarm_nfs_subnet ]
}
# NFS server
# Block device
resource "openstack_blockstorage_volume_v3" "swarm_nfs_data_vol" {
name = var.docker_swarm_data.nfs_server_data_disk_name
size = var.docker_swarm_data.nfs_server_data_disk_size
}
# Instance
resource "openstack_compute_instance_v2" "swarm_nfs_server" {
name = var.docker_swarm_data.nfs_server_name
availability_zone_hints = var.availability_zones_names.availability_zone_no_gpu
flavor_name = var.docker_swarm_data.nfs_server_flavor
key_pair = var.ssh_key_file.name
security_groups = [openstack_networking_secgroup_v2.default.name,openstack_networking_secgroup_v2.swarm_nfs_traffic.name]
block_device {
uuid = var.ubuntu_2204.uuid
source_type = "image"
volume_size = 10
boot_index = 0
destination_type = "volume"
delete_on_termination = false
}
network {
name = var.main_private_network.name
}
network {
name = var.swarm_nfs_private_network.network_name
fixed_ip_v4 = var.swarm_nfs_private_network.server_ip
}
user_data = "${file("${var.ubuntu2204_data_file}")}"
depends_on = [ openstack_networking_subnet_v2.swarm_nfs_subnet ]
}
# Attach the additional volume
resource "openstack_compute_volume_attach_v2" "swarm_nfs_data_attach_vol" {
instance_id = openstack_compute_instance_v2.swarm_nfs_server.id
volume_id = openstack_blockstorage_volume_v3.swarm_nfs_data_vol.id
device = var.docker_swarm_data.nfs_server_data_disk_device
depends_on = [openstack_compute_instance_v2.swarm_nfs_server]
}
#
# Octavia
#
# Swarm load balancer. L4, backed by Octavia
resource "openstack_lb_loadbalancer_v2" "swarm_lb" {
vip_subnet_id = var.main_private_subnet_id
name = var.octavia_swarm_data.swarm_lb_name
description = var.octavia_swarm_data.swarm_lb_description
flavor_id = var.octavia_swarm_data.octavia_flavor_id
vip_address = var.octavia_swarm_data.swarm_octavia_main_ip
loadbalancer_provider = "amphora"
}
# Allocate a floating IP
resource "openstack_networking_floatingip_v2" "swarm_lb_ip" {
pool = var.floating_ip_pools.main_public_ip_pool
# The DNS association does not work because of a bug in the OpenStack API
# dns_name = "main-lb"
# dns_domain = var.dns_zone.zone_name
description = var.octavia_swarm_data.swarm_lb_description
}
resource "openstack_networking_floatingip_associate_v2" "swarm_lb" {
floating_ip = openstack_networking_floatingip_v2.swarm_lb_ip.address
port_id = openstack_lb_loadbalancer_v2.swarm_lb.vip_port_id
}
locals {
swarm_recordset_name = "${var.octavia_swarm_data.swarm_lb_hostname}.${var.dns_zone.zone_name}"
ccp_recordset_name = "ccp.${var.dns_zone.zone_name}"
cdn_recordset_name = "cdn.${var.dns_zone.zone_name}"
conductor_recordset_name = "conductor.${var.dns_zone.zone_name}"
}
resource "openstack_dns_recordset_v2" "swarm_lb_dns_recordset" {
zone_id = var.dns_zone_id
name = local.swarm_recordset_name
description = "Public IP address of the load balancer in front of Docker Swarm"
ttl = 8600
type = "A"
records = [openstack_networking_floatingip_v2.swarm_lb_ip.address]
}
resource "openstack_dns_recordset_v2" "ccp_dns_recordset" {
zone_id = var.dns_zone_id
name = local.ccp_recordset_name
description = "CCP hostname"
ttl = 8600
type = "CNAME"
records = [local.swarm_recordset_name]
}
resource "openstack_dns_recordset_v2" "cdn_dns_recordset" {
zone_id = var.dns_zone_id
name = local.cdn_recordset_name
description = "CDN hostname"
ttl = 8600
type = "CNAME"
records = [local.swarm_recordset_name]
}
resource "openstack_dns_recordset_v2" "conductor_dns_recordset" {
zone_id = var.dns_zone_id
name = local.conductor_recordset_name
description = "Conductor hostname"
ttl = 8600
type = "CNAME"
records = [local.swarm_recordset_name]
}
# Main HAPROXY stats listener
resource "openstack_lb_listener_v2" "swarm_haproxy_stats_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "TCP"
protocol_port = 8880
description = "Listener for the stats of the Docker Swarm HAPROXY instances"
name = "swarm_haproxy_stats_listener"
allowed_cidrs = [var.ssh_sources.d4s_vpn_1_cidr,var.ssh_sources.d4s_vpn_2_cidr,var.ssh_sources.s2i2s_vpn_1_cidr,var.ssh_sources.s2i2s_vpn_2_cidr]
}
resource "openstack_lb_pool_v2" "swarm_haproxy_stats_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_stats_listener.id
protocol = "TCP"
lb_method = "LEAST_CONNECTIONS"
name = "swarm-haproxy-lb-stats"
description = "Pool for the stats of the main HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
}
resource "openstack_lb_members_v2" "swarm_haproxy_stats_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_stats_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 8880
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 8880
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 8880
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_stats_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_stats_pool.id
name = "swarm_haproxy_stats_monitor"
type = "TCP"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
# HAPROXY HTTP
resource "openstack_lb_listener_v2" "swarm_haproxy_http_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "TCP"
protocol_port = 80
description = "HTTP listener of the Docker Swarm HAPROXY instances"
name = "swarm_haproxy_http_listener"
admin_state_up = true
}
resource "openstack_lb_pool_v2" "swarm_haproxy_http_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_http_listener.id
protocol = "PROXYV2"
lb_method = "LEAST_CONNECTIONS"
name = "swarm-haproxy-lb-http"
description = "Pool for the HTTP listener of the Docker Swarm HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
admin_state_up = true
}
resource "openstack_lb_members_v2" "swarm_haproxy_http_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_http_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 80
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 80
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 80
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_http_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_http_pool.id
name = "swarm_haproxy_http_monitor"
type = "HTTP"
http_method = "GET"
url_path = "/_haproxy_health_check"
expected_codes = "200"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
# HAPROXY HTTPS
resource "openstack_lb_listener_v2" "swarm_haproxy_https_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "TCP"
protocol_port = 443
description = "HTTPS listener of the main HAPROXY instances"
name = "swarm_haproxy_https_listener"
admin_state_up = true
}
resource "openstack_lb_pool_v2" "swarm_haproxy_https_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_https_listener.id
protocol = "PROXYV2"
lb_method = "LEAST_CONNECTIONS"
name = "swarm-haproxy-lb-https"
description = "Pool for the HTTPS listener of the Docker Swarm HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
admin_state_up = true
}
resource "openstack_lb_members_v2" "swarm_haproxy_https_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_https_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 443
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 443
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 443
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_https_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_https_pool.id
name = "swarm_haproxy_https_monitor"
type = "HTTPS"
http_method = "GET"
url_path = "/_haproxy_health_check"
expected_codes = "200"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
# HAPROXY HTTP on port 8080
resource "openstack_lb_listener_v2" "swarm_haproxy_8080_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "TCP"
protocol_port = 8080
description = "HTTP port 8080 listener of the Docker Swarm HAPROXY instances"
name = "swarm_haproxy_8080_listener"
admin_state_up = true
}
resource "openstack_lb_pool_v2" "swarm_haproxy_8080_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_8080_listener.id
protocol = "PROXYV2"
lb_method = "LEAST_CONNECTIONS"
name = "swarm-haproxy-lb-http-8080"
description = "Pool for the HTTP port 8080 listener of the Docker Swarm HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
admin_state_up = true
}
resource "openstack_lb_members_v2" "swarm_haproxy_8080_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_8080_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 8080
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 8080
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 8080
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_8080_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_8080_pool.id
name = "swarm_haproxy_8080_monitor"
type = "HTTP"
http_method = "GET"
url_path = "/_haproxy_health_check"
expected_codes = "200"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
output "swarm_loadbalancer_ip" {
description = "Docker Swarm Load balancer IP address"
value = openstack_lb_loadbalancer_v2.swarm_lb.vip_address
}

View File

@ -0,0 +1,57 @@
variable "docker_swarm_data" {
type = map(string)
default = {
mgr_name = "swarm-mgr"
mgr1_ip = "10.1.40.31"
mgr1_cidr = "10.1.40.31/32"
mgr2_ip = "10.1.40.32"
mgr2_cidr = "10.1.40.32/32"
mgr3_ip = "10.1.40.33"
mgr3_cidr = "10.1.40.33/32"
mgr_count = 3
mgr_flavor = "m1.large"
mgr_data_disk_size = 100
worker_name = "swarm-worker"
worker_count = 8
worker_flavor = "m1.xxl"
worker_data_disk_size = 200
nfs_server_name = "swarm-nfs-server"
nfs_server_flavor = "m1.medium"
nfs_server_data_disk_name = "Swarm NFS server data Disk"
nfs_server_data_disk_size = 200
nfs_server_data_disk_device = "/dev/vdb"
}
}
variable "swarm_managers_ip" {
type = list(string)
default = ["10.1.40.31", "10.1.40.32", "10.1.40.33"]
}
variable "octavia_swarm_data" {
type = map(string)
default = {
swarm_lb_name = "d4s-production-cloud-swarm-l4"
swarm_lb_description = "L4 balancer that serves the D4Science production Docker Swarm cluster"
swarm_lb_name = "d4s-production-cloud-swarm-l4"
octavia_flavor = "octavia_amphora-mvcpu-ha"
octavia_flavor_id = "394988b5-6603-4a1e-a939-8e177c6681c7"
swarm_lb_hostname = "swarm-lb"
swarm_octavia_main_ip = "10.1.40.30"
swarm_octavia_main_cidr = "10.1.40.30/32"
}
}
variable "swarm_nfs_private_network" {
type = map(string)
default = {
network_name = "swarm-nfs-net"
network_description = "Network used by the swarm nodes and the NFS service"
network_cidr = "192.168.4.0/23"
allocation_pool_start = "192.168.4.100"
allocation_pool_end = "192.168.5.254"
server_ip = "192.168.4.10"
server_cidr = "192.168.4.5/23"
}
}