code-infrastructure-lab/spark-run.yaml

63 lines
2.4 KiB
YAML

apiVersion: "sparkoperator.k8s.io/v1beta2"
kind: SparkApplication
metadata:
name: spark-scholix
namespace: dnet-spark-jobs
spec:
type: Scala
mode: cluster
image: "dnet-spark:1.0.0"
imagePullPolicy: IfNotPresent
mainClass: eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump
mainApplicationFile: "s3a://deps/dhp-shade-package-1.2.5-SNAPSHOT.jar"
arguments: [
"--sourcePath", "s3a://raw-graph/01",
"--targetPath", "s3a://scholix"]
sparkVersion: "3.5.1"
sparkConf:
spark.driver.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
spark.executor.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
spark.hadoop.fs.defaultFS: "s3a://scholix"
spark.hadoop.fs.s3a.access.key: "minio"
spark.hadoop.fs.s3a.secret.key: "minio123"
spark.hadoop.fs.s3a.endpoint: "https://minio.dnet-minio-tenant.svc.cluster.local"
spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem"
spark.hadoop.fs.s3a.path.style.access: "true"
spark.hadoop.fs.s3a.attempts.maximum: "1"
spark.hadoop.fs.s3a.connection.establish.timeout : "5000"
spark.hadoop.fs.s3a.connection.timeout: "10001"
spark.hadoop.fs.s3a.connection.ssl.enabled: "false"
com.amazonaws.sdk.disableCertChecking: "true"
com.cloudera.com.amazonaws.sdk.disableCertChecking: "true"
fs.s3a.connection.ssl.strictverify: "false"
fs.s3a.connection.ssl.enabled: "false"
fs.s3a.ssl.enabled: "false"
spark.hadoop.fs.s3a.ssl.enabled: "false"
restartPolicy:
type: Never
volumes:
- name: "test-volume"
persistentVolumeClaim:
claimName: my-spark-pvc-tmp
driver:
javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
cores: 1
coreLimit: "1200m"
memory: "2G"
labels:
version: 3.5.1
serviceAccount: spark
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
executor:
javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
cores: 10
memoryOverhead: "3G"
memory: "4G"
instances: 1
labels:
version: 3.5.1
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"