Docker swarm in preprod.

This commit is contained in:
Andrea Dell'Amico 2024-11-06 17:59:18 +01:00
parent c890ac9cf7
commit 439944a380
Signed by: andrea.dellamico
GPG Key ID: 147ABE6CEB9E20FF
6 changed files with 4486 additions and 0 deletions

View File

@ -0,0 +1,3 @@
# Docker Swarm setup in preprod. Manager nodes only
It's expected to use Manila for NFS, and a OVN load balancer.

View File

@ -0,0 +1,448 @@
#
# Server groups for both the manager nodes
#
resource "openstack_compute_servergroup_v2" "swarm_masters" {
name = "swarm_masters"
policies = ["soft-anti-affinity"]
}
#
# Security groups
#
resource "openstack_networking_secgroup_v2" "swarm_internal_traffic" {
name = "swarm_internal_docker_traffic"
delete_default_rules = "true"
description = "Traffic between the Docker Swarm nodes"
}
resource "openstack_networking_secgroup_rule_v2" "everything_udp" {
security_group_id = openstack_networking_secgroup_v2.swarm_internal_traffic.id
description = "UDP traffic between Swarm nodes"
direction = "ingress"
ethertype = "IPv4"
protocol = "udp"
remote_ip_prefix = data.terraform_remote_state.privnet_dns_router.outputs.main_private_subnet.cidr
}
resource "openstack_networking_secgroup_rule_v2" "everything_tcp" {
security_group_id = openstack_networking_secgroup_v2.swarm_internal_traffic.id
description = "TCP traffic between Swarm nodes"
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
remote_ip_prefix = data.terraform_remote_state.privnet_dns_router.outputs.main_private_subnet.cidr
}
#
# Swarm Manager VMs
#
# Instance
resource "openstack_compute_instance_v2" "docker_swarm_managers" {
count = var.docker_swarm_data.mgr_count
name = format("%s-%02d", var.docker_swarm_data.mgr_name, count.index + 1)
# availability_zone_hints = var.availability_zones_names.availability_zone_no_gpu
flavor_name = var.docker_swarm_data.mgr_flavor
key_pair = module.ssh_settings.ssh_key_name
security_groups = [data.terraform_remote_state.privnet_dns_router.outputs.default_security_group_name, openstack_networking_secgroup_v2.swarm_internal_traffic.name, "default", "nfs_share_no_ingress"]
scheduler_hints {
group = openstack_compute_servergroup_v2.swarm_masters.id
}
block_device {
uuid = module.common_variables.ubuntu_2204.uuid
source_type = "image"
volume_size = 10
boot_index = 0
destination_type = "volume"
delete_on_termination = false
}
block_device {
source_type = "blank"
volume_size = var.docker_swarm_data.mgr_data_disk_size
boot_index = -1
destination_type = "volume"
delete_on_termination = false
}
network {
name = data.terraform_remote_state.privnet_dns_router.outputs.main_private_network.name
fixed_ip_v4 = var.swarm_managers_ip.* [count.index]
}
network {
name = module.common_variables.networks_list.shared_postgresql
}
user_data = file("${module.common_variables.ubuntu2204_data_file}")
# Do not replace the instance when the ssh key changes
lifecycle {
ignore_changes = [
# Ignore changes to tags, e.g. because a management agent
# updates these based on some ruleset managed elsewhere.
key_pair, user_data, network
]
}
}
#
# Manila NFS Share
#
# Managers
resource "openstack_networking_port_v2" "swarm_mgr_nfs_port" {
count = var.docker_swarm_data.mgr_count
name = format("%s-%02d", var.docker_swarm_data.mgr_name, count.index + 1)
network_id = data.terraform_remote_state.privnet_dns_router.outputs.storage_nfs_network_id
admin_state_up = "true"
fixed_ip {
subnet_id = data.terraform_remote_state.privnet_dns_router.outputs.storage_nfs_subnet_id
}
}
resource "openstack_networking_port_secgroup_associate_v2" "swarm_mgr_nfs_port_secgroup" {
count = var.docker_swarm_data.mgr_count
port_id = openstack_networking_port_v2.swarm_mgr_nfs_port[count.index].id
security_group_ids = [data.terraform_remote_state.privnet_dns_router.outputs.nfs_share_no_ingress_secgroup_id]
}
resource "openstack_compute_interface_attach_v2" "nfs_port_to_swarm_mgr" {
count = var.docker_swarm_data.mgr_count
instance_id = openstack_compute_instance_v2.docker_swarm_managers[count.index].id
port_id = openstack_networking_port_v2.swarm_mgr_nfs_port[count.index].id
}
#
# Octavia
#
# Swarm load balancer. L4, backed by Octavia
resource "openstack_lb_loadbalancer_v2" "swarm_lb" {
vip_subnet_id = data.terraform_remote_state.privnet_dns_router.outputs.main_subnet_network_id
name = var.octavia_swarm_data.swarm_lb_name
description = var.octavia_swarm_data.swarm_lb_description
# flavor_id = var.octavia_swarm_data.octavia_flavor_id
vip_address = var.octavia_swarm_data.swarm_octavia_main_ip
# availability_zone = var.availability_zones_names.availability_zone_no_gpu
# loadbalancer_provider = "amphora"
loadbalancer_provider = "ovn"
}
# Allocate a floating IP
resource "openstack_networking_floatingip_v2" "swarm_lb_ip" {
pool = data.terraform_remote_state.privnet_dns_router.outputs.floating_ip_pools.main_public_ip_pool
# The DNS association does not work because of a bug in the OpenStack API
# dns_name = "main-lb"
# dns_domain = data.terraform_remote_state.privnet_dns_router.outputs.dns_zone.zone_name
description = var.octavia_swarm_data.swarm_lb_description
}
resource "openstack_networking_floatingip_associate_v2" "swarm_lb" {
floating_ip = openstack_networking_floatingip_v2.swarm_lb_ip.address
port_id = openstack_lb_loadbalancer_v2.swarm_lb.vip_port_id
}
locals {
swarm_recordset_name = "${var.octavia_swarm_data.swarm_lb_hostname}.${data.terraform_remote_state.privnet_dns_router.outputs.dns_zone.zone_name}"
portainer_recordset_name = "portainer.${data.terraform_remote_state.privnet_dns_router.outputs.dns_zone.zone_name}"
conductor_recordset_name = "conductor.${data.terraform_remote_state.privnet_dns_router.outputs.dns_zone.zone_name}"
}
resource "openstack_dns_recordset_v2" "swarm_lb_dns_recordset" {
zone_id = data.terraform_remote_state.privnet_dns_router.outputs.dns_zone_id
name = local.swarm_recordset_name
description = "Public IP address of the load balancer in front of Docker Swarm"
ttl = 8600
type = "A"
records = [openstack_networking_floatingip_v2.swarm_lb_ip.address]
}
resource "openstack_dns_recordset_v2" "swarm_portainer_dns_recordset" {
zone_id = data.terraform_remote_state.privnet_dns_router.outputs.dns_zone_id
name = local.portainer_recordset_name
description = "Portainer hostname"
ttl = 8600
type = "CNAME"
records = [local.swarm_recordset_name]
}
resource "openstack_dns_recordset_v2" "conductor_dns_recordset" {
zone_id = data.terraform_remote_state.privnet_dns_router.outputs.dns_zone_id
name = local.conductor_recordset_name
description = "Conductor hostname"
ttl = 8600
type = "CNAME"
records = [local.swarm_recordset_name]
}
# Main HAPROXY stats listener
resource "openstack_lb_listener_v2" "swarm_haproxy_stats_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "TCP"
protocol_port = 8880
description = "Listener for the stats of the Docker Swarm HAPROXY instances"
name = "swarm_haproxy_stats_listener"
# allowed_cidrs = [data.terraform_remote_state.infrastructure_setup.outputs.ssh_sources.d4s_vpn_1_cidr, data.terraform_remote_state.infrastructure_setup.outputs.ssh_sources.d4s_vpn_2_cidr, data.terraform_remote_state.infrastructure_setup.outputs.ssh_sources.s2i2s_vpn_1_cidr, data.terraform_remote_state.infrastructure_setup.outputs.ssh_sources.s2i2s_vpn_2_cidr]
}
resource "openstack_lb_pool_v2" "swarm_haproxy_stats_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_stats_listener.id
protocol = "TCP"
lb_method = "SOURCE_IP_PORT"
name = "swarm-haproxy-lb-stats"
description = "Pool for the stats of the main HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
}
resource "openstack_lb_members_v2" "swarm_haproxy_stats_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_stats_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 8880
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 8880
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 8880
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_stats_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_stats_pool.id
name = "swarm_haproxy_stats_monitor"
type = "TCP"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
# HAPROXY HTTP
resource "openstack_lb_listener_v2" "swarm_haproxy_http_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "TCP"
protocol_port = 80
description = "HTTP listener of the Docker Swarm HAPROXY instances"
name = "swarm_haproxy_http_listener"
admin_state_up = true
}
resource "openstack_lb_pool_v2" "swarm_haproxy_http_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_http_listener.id
protocol = "TCP"
lb_method = "SOURCE_IP_PORT"
name = "swarm-haproxy-lb-http"
description = "Pool for the HTTP listener of the Docker Swarm HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
admin_state_up = true
}
resource "openstack_lb_members_v2" "swarm_haproxy_http_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_http_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 80
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 80
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 80
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_http_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_http_pool.id
name = "swarm_haproxy_http_monitor"
type = "TCP"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
# HAPROXY HTTPS
resource "openstack_lb_listener_v2" "swarm_haproxy_https_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "TCP"
protocol_port = 443
description = "HTTPS listener of the main HAPROXY instances"
name = "swarm_haproxy_https_listener"
timeout_client_data = 3600000
timeout_member_connect = 10000
timeout_member_data = 7200000
admin_state_up = true
}
resource "openstack_lb_pool_v2" "swarm_haproxy_https_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_https_listener.id
protocol = "TCP"
lb_method = "SOURCE_IP_PORT"
name = "swarm-haproxy-lb-https"
description = "Pool for the HTTPS listener of the Docker Swarm HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
admin_state_up = true
}
resource "openstack_lb_members_v2" "swarm_haproxy_https_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_https_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 443
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 443
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 443
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_https_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_https_pool.id
name = "swarm_haproxy_https_monitor"
type = "TCP"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
# HTTP/3 (QUIC)
resource "openstack_lb_listener_v2" "swarm_haproxy_quic_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "UDP"
protocol_port = 443
description = "HTTPS listener of the main HAPROXY instances"
name = "swarm_haproxy_quic_listener"
timeout_client_data = 3600000
timeout_member_connect = 10000
timeout_member_data = 7200000
admin_state_up = true
}
resource "openstack_lb_pool_v2" "swarm_haproxy_quic_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_quic_listener.id
protocol = "UDP"
lb_method = "SOURCE_IP_PORT"
name = "swarm-haproxy-lb-quic"
description = "Pool for the HTTP/3 (QUIC) listener of the Docker Swarm HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
admin_state_up = true
}
resource "openstack_lb_members_v2" "swarm_haproxy_quic_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_quic_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 443
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 443
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 443
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_quic_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_quic_pool.id
name = "swarm_haproxy_quic_monitor"
type = "UDP-CONNECT"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
# HAPROXY HTTP on port 8080
resource "openstack_lb_listener_v2" "swarm_haproxy_8080_listener" {
loadbalancer_id = openstack_lb_loadbalancer_v2.swarm_lb.id
protocol = "TCP"
protocol_port = 8080
description = "HTTP port 8080 listener of the Docker Swarm HAPROXY instances"
name = "swarm_haproxy_8080_listener"
admin_state_up = true
}
resource "openstack_lb_pool_v2" "swarm_haproxy_8080_pool" {
listener_id = openstack_lb_listener_v2.swarm_haproxy_8080_listener.id
protocol = "TCP"
lb_method = "SOURCE_IP_PORT"
name = "swarm-haproxy-lb-http-8080"
description = "Pool for the HTTP port 8080 listener of the Docker Swarm HAPROXY instances"
persistence {
type = "SOURCE_IP"
}
admin_state_up = true
}
resource "openstack_lb_members_v2" "swarm_haproxy_8080_pool_members" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_8080_pool.id
member {
name = "swarm mgr haproxy 1"
address = var.docker_swarm_data.mgr1_ip
protocol_port = 8080
}
member {
name = "swarm mgr haproxy 2"
address = var.docker_swarm_data.mgr2_ip
protocol_port = 8080
}
member {
name = "swarm mgr haproxy 3"
address = var.docker_swarm_data.mgr3_ip
protocol_port = 8080
}
}
resource "openstack_lb_monitor_v2" "swarm_haproxy_8080_monitor" {
pool_id = openstack_lb_pool_v2.swarm_haproxy_8080_pool.id
name = "swarm_haproxy_8080_monitor"
type = "TCP"
delay = 20
timeout = 5
max_retries = 3
admin_state_up = true
}
output "swarm_loadbalancer_ip" {
description = "Docker Swarm Load balancer IP address"
value = openstack_lb_loadbalancer_v2.swarm_lb.vip_address
}
output "swarm_manager_nodes" {
description = "Docker Swarm Manager nodes data"
value = openstack_compute_instance_v2.docker_swarm_managers
sensitive = true
}
output "swarm_managers_nfs_ip_ports" {
description = "IP addresses in the share NFS network"
value = openstack_networking_port_v2.swarm_mgr_nfs_port
}

View File

@ -0,0 +1,38 @@
# Define required providers
terraform {
required_version = ">= 0.14.0"
required_providers {
openstack = {
source = "terraform-provider-openstack/openstack"
version = ">= 2.0.0"
}
}
}
data "terraform_remote_state" "privnet_dns_router" {
backend = "local"
config = {
path = "../project-setup/terraform.tfstate"
}
}
data "terraform_remote_state" "infrastructure_setup" {
backend = "local"
config = {
path = "../basic-infrastructure/terraform.tfstate"
}
}
#
# Uses common_variables as module
#
module "common_variables" {
source = "../../modules/common_variables"
}
# Module used
module "ssh_settings" {
source = "../../modules/ssh-key-ref"
}

View File

@ -0,0 +1,3 @@
provider "openstack" {
cloud = "d4s-pre"
}

View File

@ -0,0 +1,33 @@
variable "docker_swarm_data" {
type = map(string)
default = {
mgr_name = "swarm-mgr"
mgr1_ip = "10.1.32.31"
mgr1_cidr = "10.1.32.31/32"
mgr2_ip = "10.1.32.32"
mgr2_cidr = "10.1.32.32/32"
mgr3_ip = "10.1.32.33"
mgr3_cidr = "10.1.32.33/32"
mgr_count = 3
mgr_flavor = "m1.large"
mgr_data_disk_size = 100
}
}
variable "swarm_managers_ip" {
type = list(string)
default = ["10.1.32.31", "10.1.32.32", "10.1.32.33"]
}
variable "octavia_swarm_data" {
type = map(string)
default = {
swarm_lb_name = "d4s-pre-cloud-swarm-l4"
swarm_lb_description = "L4 balancer that serves the D4Science pre Docker Swarm cluster"
swarm_lb_name = "d4s-pre-cloud-swarm-l4"
swarm_lb_hostname = "swarm-lb"
swarm_octavia_main_ip = "10.1.32.30"
swarm_octavia_main_cidr = "10.1.32.30/32"
}
}

File diff suppressed because it is too large Load Diff