Update spark operator version

This commit is contained in:
Giambattista Bloisi 2024-10-21 09:36:55 +02:00
parent 0a2956d81f
commit fa90a9dbe0
6 changed files with 96 additions and 88 deletions

View File

@ -0,0 +1,7 @@
# docker build -t spark-operator:2.0.2 . && kind load docker-image -n dnet-data-platform spark-operator:2.0.2
FROM kubeflow/spark-operator:2.0.2
ENV SPARK_HOME /opt/spark
USER root
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar -o ${SPARK_HOME}/jars/hadoop-aws-3.3.4.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-1.12.262.jar

View File

@ -22,15 +22,17 @@ resource "kubernetes_role" "airflow_spark_role" {
rule { rule {
api_groups = ["sparkoperator.k8s.io"] api_groups = ["sparkoperator.k8s.io"]
resources = ["sparkapplications", "sparkapplications/status", resources = [
"scheduledsparkapplications", "scheduledsparkapplications/status"] "sparkapplications", "sparkapplications/status",
verbs = ["*"] "scheduledsparkapplications", "scheduledsparkapplications/status"
]
verbs = ["*"]
} }
rule { rule {
api_groups = [""] api_groups = [""]
resources = ["pods/log"] resources = ["pods", "pods/log"]
verbs = ["*"] verbs = ["*"]
} }
} }
@ -55,49 +57,27 @@ resource "kubernetes_role_binding_v1" "airflow_spark_role_binding" {
} }
resource "kubernetes_role_binding_v1" "airflow_spark_role_binding2" { resource "kubernetes_role_binding_v1" "airflow_spark_role_binding2" {
depends_on = [kubernetes_namespace.spark_jobs_namespace] depends_on = [kubernetes_namespace.spark_jobs_namespace]
metadata { metadata {
name = "airflow-spark-role-binding2" name = "airflow-spark-role-binding2"
namespace = "${var.namespace_prefix}spark-jobs" namespace = "${var.namespace_prefix}spark-jobs"
} }
subject { subject {
kind = "ServiceAccount" kind = "ServiceAccount"
name = "airflow-worker" name = "airflow-worker"
namespace = "${var.namespace_prefix}airflow" namespace = "${var.namespace_prefix}airflow"
} }
role_ref { role_ref {
api_group = "rbac.authorization.k8s.io" api_group = "rbac.authorization.k8s.io"
kind = "Role" kind = "Role"
name = "spark-role" name = "spark-role"
} }
} }
#
#
# resource "kubernetes_role_binding_v1" "spark_role_binding" {
# depends_on = [kubernetes_namespace.spark_jobs_namespace]
# metadata {
# name = "spark-role-binding"
# namespace = "${var.namespace_prefix}spark-jobs"
# }
#
# subject {
# kind = "ServiceAccount"
# name = "spark"
# namespace = "${var.namespace_prefix}spark-jobs"
# }
#
# role_ref {
# api_group = "rbac.authorization.k8s.io"
# kind = "Role"
# name = "spark-role"
# }
# }
#
resource "helm_release" "gcp_spark_operator" { resource "helm_release" "gcp_spark_operator" {
depends_on = [kubernetes_namespace.spark_jobs_namespace] depends_on = [kubernetes_namespace.spark_jobs_namespace]
name = "gcp-spark-operator" name = "gcp-spark-operator"
chart = "spark-operator" chart = "spark-operator"
repository = "https://kubeflow.github.io/spark-operator" repository = "https://kubeflow.github.io/spark-operator"
@ -106,23 +86,38 @@ resource "helm_release" "gcp_spark_operator" {
dependency_update = "true" dependency_update = "true"
version = "2.0.2" version = "2.0.2"
# set { set {
# name = "image.repository" name = "image.repository"
# value = "kubeflow/spark-operator" value = "spark-operator"
# } }
# set {
# name = "image.tag"
# value = "v1beta2-1.4.5-3.5.0"
# }
set { set {
name = "sparkJobNamespaces" name = "image.tag"
value = "2.0.2"
}
set {
name = "spark.jobNamespaces"
value = "{${var.namespace_prefix}spark-jobs}" value = "{${var.namespace_prefix}spark-jobs}"
} }
set { set {
name = "serviceAccounts.spark.name" name = "spark.serviceAccount.create"
value = "true"
}
set {
name = "spark.serviceAccount.name"
value = "spark"
}
set {
name = "controller.serviceAccount.create"
value = "true"
}
set {
name = "controller.serviceAccount.name"
value = "spark" value = "spark"
} }
@ -147,13 +142,13 @@ resource "kubernetes_namespace" "airflow" {
resource "kubernetes_secret" "s3_conn_secrets" { resource "kubernetes_secret" "s3_conn_secrets" {
depends_on = [kubernetes_namespace.airflow] depends_on = [kubernetes_namespace.airflow]
metadata { metadata {
name = "s3-conn-secrets" name = "s3-conn-secrets"
namespace = "${var.namespace_prefix}airflow" namespace = "${var.namespace_prefix}airflow"
} }
data = { data = {
username = var.s3_key username = var.s3_key
password = var.s3_secret password = var.s3_secret
AIRFLOW_CONN_S3_CONN = <<EOT AIRFLOW_CONN_S3_CONN = <<EOT
{ {
"conn_type": "aws", "conn_type": "aws",
@ -171,7 +166,6 @@ EOT
} }
resource "helm_release" "airflow" { resource "helm_release" "airflow" {
depends_on = [kubernetes_secret.s3_conn_secrets] depends_on = [kubernetes_secret.s3_conn_secrets]
@ -197,7 +191,7 @@ resource "helm_release" "airflow" {
} }
set { set {
name = "spec.values.env" name = "spec.values.env"
value = yamlencode([ value = yamlencode([
{ {
name = "AIRFLOW__WEBSERVER__BASE_URL", name = "AIRFLOW__WEBSERVER__BASE_URL",
@ -211,17 +205,17 @@ resource "helm_release" "airflow" {
} }
set { set {
name ="dags.gitSync.repo" name = "dags.gitSync.repo"
value = var.repo_url value = var.repo_url
} }
set { set {
name ="dags.gitSync.branch" name = "dags.gitSync.branch"
value = var.branch_name value = var.branch_name
} }
set { set {
name ="dags.gitSync.subPath" name = "dags.gitSync.subPath"
value = var.dag_path value = var.dag_path
} }
@ -230,10 +224,10 @@ resource "helm_release" "airflow" {
# value = "gbloisi/airflow" # value = "gbloisi/airflow"
# } # }
# set { set {
# name = "images.airflow.tag" name = "images.airflow.tag"
# value = "2.8.3rc1-python3.11" value = "2.9.3-python3.11"
# } }
set { set {
name = "ingress.web.host" name = "ingress.web.host"

View File

@ -1,12 +1,9 @@
provider "helm" { terraform {
# Several Kubernetes authentication methods are possible: https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs#authentication required_providers {
kubernetes { helm = {
config_path = pathexpand(var.kube_config) }
config_context = var.kube_context
kubernetes = {
}
} }
} }
provider "kubernetes" {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
}

View File

@ -1,12 +1,9 @@
provider "helm" { terraform {
# Several Kubernetes authentication methods are possible: https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs#authentication required_providers {
kubernetes { helm = {
config_path = pathexpand(var.kube_config) }
config_context = var.kube_context
kubernetes = {
}
} }
} }
provider "kubernetes" {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
}

12
providers.tf Normal file
View File

@ -0,0 +1,12 @@
provider "helm" {
# Several Kubernetes authentication methods are possible: https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs#authentication
kubernetes {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
}
}
provider "kubernetes" {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
}

View File

@ -1,6 +1,7 @@
FROM spark:3.5.1-scala2.12-java17-ubuntu # docker build -t dnet-spark:1.0.0 . && kind load docker-image -n dnet-data-platform dnet-spark:1.0.0
FROM spark:3.5.3-scala2.12-java17-ubuntu
user root USER root
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar -o ${SPARK_HOME}/jars/hadoop-aws-3.3.4.jar RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar -o ${SPARK_HOME}/jars/hadoop-aws-3.3.4.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-1.12.262.jar RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-1.12.262.jar