diff --git a/docker-images/spark-operator/Dockerfile b/docker-images/spark-operator/Dockerfile new file mode 100644 index 0000000..4e58fa3 --- /dev/null +++ b/docker-images/spark-operator/Dockerfile @@ -0,0 +1,7 @@ +# docker build -t spark-operator:2.0.2 . && kind load docker-image -n dnet-data-platform spark-operator:2.0.2 +FROM kubeflow/spark-operator:2.0.2 + +ENV SPARK_HOME /opt/spark +USER root +RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar -o ${SPARK_HOME}/jars/hadoop-aws-3.3.4.jar +RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-1.12.262.jar diff --git a/modules/airflow/airflow.tf b/modules/airflow/airflow.tf index b5000d4..6b0c267 100644 --- a/modules/airflow/airflow.tf +++ b/modules/airflow/airflow.tf @@ -22,15 +22,17 @@ resource "kubernetes_role" "airflow_spark_role" { rule { api_groups = ["sparkoperator.k8s.io"] - resources = ["sparkapplications", "sparkapplications/status", - "scheduledsparkapplications", "scheduledsparkapplications/status"] - verbs = ["*"] + resources = [ + "sparkapplications", "sparkapplications/status", + "scheduledsparkapplications", "scheduledsparkapplications/status" + ] + verbs = ["*"] } rule { api_groups = [""] - resources = ["pods/log"] - verbs = ["*"] + resources = ["pods", "pods/log"] + verbs = ["*"] } } @@ -55,49 +57,27 @@ resource "kubernetes_role_binding_v1" "airflow_spark_role_binding" { } resource "kubernetes_role_binding_v1" "airflow_spark_role_binding2" { - depends_on = [kubernetes_namespace.spark_jobs_namespace] - metadata { - name = "airflow-spark-role-binding2" - namespace = "${var.namespace_prefix}spark-jobs" - } + depends_on = [kubernetes_namespace.spark_jobs_namespace] + metadata { + name = "airflow-spark-role-binding2" + namespace = "${var.namespace_prefix}spark-jobs" + } - subject { - kind = "ServiceAccount" - name = "airflow-worker" - namespace = "${var.namespace_prefix}airflow" - } + subject { + kind = "ServiceAccount" + name = "airflow-worker" + namespace = "${var.namespace_prefix}airflow" + } - role_ref { - api_group = "rbac.authorization.k8s.io" - kind = "Role" - name = "spark-role" - } + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "Role" + name = "spark-role" + } } -# -# -# resource "kubernetes_role_binding_v1" "spark_role_binding" { -# depends_on = [kubernetes_namespace.spark_jobs_namespace] -# metadata { -# name = "spark-role-binding" -# namespace = "${var.namespace_prefix}spark-jobs" -# } -# -# subject { -# kind = "ServiceAccount" -# name = "spark" -# namespace = "${var.namespace_prefix}spark-jobs" -# } -# -# role_ref { -# api_group = "rbac.authorization.k8s.io" -# kind = "Role" -# name = "spark-role" -# } -# } -# resource "helm_release" "gcp_spark_operator" { - depends_on = [kubernetes_namespace.spark_jobs_namespace] + depends_on = [kubernetes_namespace.spark_jobs_namespace] name = "gcp-spark-operator" chart = "spark-operator" repository = "https://kubeflow.github.io/spark-operator" @@ -106,23 +86,38 @@ resource "helm_release" "gcp_spark_operator" { dependency_update = "true" version = "2.0.2" - # set { - # name = "image.repository" - # value = "kubeflow/spark-operator" - # } - - # set { - # name = "image.tag" - # value = "v1beta2-1.4.5-3.5.0" - # } + set { + name = "image.repository" + value = "spark-operator" + } set { - name = "sparkJobNamespaces" + name = "image.tag" + value = "2.0.2" + } + + set { + name = "spark.jobNamespaces" value = "{${var.namespace_prefix}spark-jobs}" } set { - name = "serviceAccounts.spark.name" + name = "spark.serviceAccount.create" + value = "true" + } + + set { + name = "spark.serviceAccount.name" + value = "spark" + } + + set { + name = "controller.serviceAccount.create" + value = "true" + } + + set { + name = "controller.serviceAccount.name" value = "spark" } @@ -147,13 +142,13 @@ resource "kubernetes_namespace" "airflow" { resource "kubernetes_secret" "s3_conn_secrets" { depends_on = [kubernetes_namespace.airflow] metadata { - name = "s3-conn-secrets" + name = "s3-conn-secrets" namespace = "${var.namespace_prefix}airflow" } data = { - username = var.s3_key - password = var.s3_secret + username = var.s3_key + password = var.s3_secret AIRFLOW_CONN_S3_CONN = <