Update spark operator version

This commit is contained in:
Giambattista Bloisi 2024-10-21 09:36:55 +02:00
parent 0a2956d81f
commit fa90a9dbe0
6 changed files with 96 additions and 88 deletions

View File

@ -0,0 +1,7 @@
# docker build -t spark-operator:2.0.2 . && kind load docker-image -n dnet-data-platform spark-operator:2.0.2
FROM kubeflow/spark-operator:2.0.2
ENV SPARK_HOME /opt/spark
USER root
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar -o ${SPARK_HOME}/jars/hadoop-aws-3.3.4.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-1.12.262.jar

View File

@ -22,15 +22,17 @@ resource "kubernetes_role" "airflow_spark_role" {
rule {
api_groups = ["sparkoperator.k8s.io"]
resources = ["sparkapplications", "sparkapplications/status",
"scheduledsparkapplications", "scheduledsparkapplications/status"]
verbs = ["*"]
resources = [
"sparkapplications", "sparkapplications/status",
"scheduledsparkapplications", "scheduledsparkapplications/status"
]
verbs = ["*"]
}
rule {
api_groups = [""]
resources = ["pods/log"]
verbs = ["*"]
resources = ["pods", "pods/log"]
verbs = ["*"]
}
}
@ -55,49 +57,27 @@ resource "kubernetes_role_binding_v1" "airflow_spark_role_binding" {
}
resource "kubernetes_role_binding_v1" "airflow_spark_role_binding2" {
depends_on = [kubernetes_namespace.spark_jobs_namespace]
metadata {
name = "airflow-spark-role-binding2"
namespace = "${var.namespace_prefix}spark-jobs"
}
depends_on = [kubernetes_namespace.spark_jobs_namespace]
metadata {
name = "airflow-spark-role-binding2"
namespace = "${var.namespace_prefix}spark-jobs"
}
subject {
kind = "ServiceAccount"
name = "airflow-worker"
namespace = "${var.namespace_prefix}airflow"
}
subject {
kind = "ServiceAccount"
name = "airflow-worker"
namespace = "${var.namespace_prefix}airflow"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = "spark-role"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = "spark-role"
}
}
#
#
# resource "kubernetes_role_binding_v1" "spark_role_binding" {
# depends_on = [kubernetes_namespace.spark_jobs_namespace]
# metadata {
# name = "spark-role-binding"
# namespace = "${var.namespace_prefix}spark-jobs"
# }
#
# subject {
# kind = "ServiceAccount"
# name = "spark"
# namespace = "${var.namespace_prefix}spark-jobs"
# }
#
# role_ref {
# api_group = "rbac.authorization.k8s.io"
# kind = "Role"
# name = "spark-role"
# }
# }
#
resource "helm_release" "gcp_spark_operator" {
depends_on = [kubernetes_namespace.spark_jobs_namespace]
depends_on = [kubernetes_namespace.spark_jobs_namespace]
name = "gcp-spark-operator"
chart = "spark-operator"
repository = "https://kubeflow.github.io/spark-operator"
@ -106,23 +86,38 @@ resource "helm_release" "gcp_spark_operator" {
dependency_update = "true"
version = "2.0.2"
# set {
# name = "image.repository"
# value = "kubeflow/spark-operator"
# }
# set {
# name = "image.tag"
# value = "v1beta2-1.4.5-3.5.0"
# }
set {
name = "image.repository"
value = "spark-operator"
}
set {
name = "sparkJobNamespaces"
name = "image.tag"
value = "2.0.2"
}
set {
name = "spark.jobNamespaces"
value = "{${var.namespace_prefix}spark-jobs}"
}
set {
name = "serviceAccounts.spark.name"
name = "spark.serviceAccount.create"
value = "true"
}
set {
name = "spark.serviceAccount.name"
value = "spark"
}
set {
name = "controller.serviceAccount.create"
value = "true"
}
set {
name = "controller.serviceAccount.name"
value = "spark"
}
@ -147,13 +142,13 @@ resource "kubernetes_namespace" "airflow" {
resource "kubernetes_secret" "s3_conn_secrets" {
depends_on = [kubernetes_namespace.airflow]
metadata {
name = "s3-conn-secrets"
name = "s3-conn-secrets"
namespace = "${var.namespace_prefix}airflow"
}
data = {
username = var.s3_key
password = var.s3_secret
username = var.s3_key
password = var.s3_secret
AIRFLOW_CONN_S3_CONN = <<EOT
{
"conn_type": "aws",
@ -171,7 +166,6 @@ EOT
}
resource "helm_release" "airflow" {
depends_on = [kubernetes_secret.s3_conn_secrets]
@ -197,7 +191,7 @@ resource "helm_release" "airflow" {
}
set {
name = "spec.values.env"
name = "spec.values.env"
value = yamlencode([
{
name = "AIRFLOW__WEBSERVER__BASE_URL",
@ -211,17 +205,17 @@ resource "helm_release" "airflow" {
}
set {
name ="dags.gitSync.repo"
name = "dags.gitSync.repo"
value = var.repo_url
}
set {
name ="dags.gitSync.branch"
name = "dags.gitSync.branch"
value = var.branch_name
}
set {
name ="dags.gitSync.subPath"
name = "dags.gitSync.subPath"
value = var.dag_path
}
@ -230,10 +224,10 @@ resource "helm_release" "airflow" {
# value = "gbloisi/airflow"
# }
# set {
# name = "images.airflow.tag"
# value = "2.8.3rc1-python3.11"
# }
set {
name = "images.airflow.tag"
value = "2.9.3-python3.11"
}
set {
name = "ingress.web.host"

View File

@ -1,12 +1,9 @@
provider "helm" {
# Several Kubernetes authentication methods are possible: https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs#authentication
kubernetes {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
terraform {
required_providers {
helm = {
}
kubernetes = {
}
}
}
provider "kubernetes" {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
}

View File

@ -1,12 +1,9 @@
provider "helm" {
# Several Kubernetes authentication methods are possible: https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs#authentication
kubernetes {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
terraform {
required_providers {
helm = {
}
kubernetes = {
}
}
}
provider "kubernetes" {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
}

12
providers.tf Normal file
View File

@ -0,0 +1,12 @@
provider "helm" {
# Several Kubernetes authentication methods are possible: https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs#authentication
kubernetes {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
}
}
provider "kubernetes" {
config_path = pathexpand(var.kube_config)
config_context = var.kube_context
}

View File

@ -1,6 +1,7 @@
FROM spark:3.5.1-scala2.12-java17-ubuntu
# docker build -t dnet-spark:1.0.0 . && kind load docker-image -n dnet-data-platform dnet-spark:1.0.0
FROM spark:3.5.3-scala2.12-java17-ubuntu
user root
USER root
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar -o ${SPARK_HOME}/jars/hadoop-aws-3.3.4.jar
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-1.12.262.jar