diff --git a/modules/airflow/airflow.tf b/modules/airflow/airflow.tf index e1ba509..0bc369e 100644 --- a/modules/airflow/airflow.tf +++ b/modules/airflow/airflow.tf @@ -108,12 +108,12 @@ resource "helm_release" "gcp_spark_operator" { set { name = "image.repository" - value = "gbloisi/spark-operator" + value = "kubeflow/spark-operator" } set { name = "image.tag" - value = "v1beta2-1.4.3-3.5.1" + value = "v1beta2-1.4.5-3.5.0" } set { diff --git a/spark-run.yaml b/spark-run.yaml index 1f7d95f..b23669e 100644 --- a/spark-run.yaml +++ b/spark-run.yaml @@ -9,11 +9,10 @@ spec: image: "dnet-spark:1.0.0" imagePullPolicy: IfNotPresent mainClass: eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump - mainApplicationFile: "s3a://lib/dhp-shade-package-1.2.5-SNAPSHOT.jar" + mainApplicationFile: "s3a://deps/dhp-shade-package-1.2.5-SNAPSHOT.jar" arguments: [ - "--sourcePath", "s3a://raw-graph", - "--targetPath", "s3a://scholix", - "--master", "local[*]" ] + "--sourcePath", "s3a://raw-graph/01", + "--targetPath", "s3a://scholix"] sparkVersion: "3.5.1" sparkConf: spark.driver.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true" @@ -40,11 +39,6 @@ spec: - name: "test-volume" persistentVolumeClaim: claimName: my-spark-pvc-tmp - dynamicAllocation: - enabled: true - initialExecutors: 2 - minExecutors: 2 - maxExecutors: 16 driver: javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true" cores: 1 @@ -58,8 +52,10 @@ spec: mountPath: "/tmp" executor: javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true" - cores: 1 - memory: "2G" + cores: 10 + memoryOverhead: "3G" + memory: "4G" + instances: 1 labels: version: 3.5.1 volumeMounts: