67 lines
2.5 KiB
YAML
67 lines
2.5 KiB
YAML
|
apiVersion: "sparkoperator.k8s.io/v1beta2"
|
||
|
kind: SparkApplication
|
||
|
metadata:
|
||
|
name: spark-scholix
|
||
|
namespace: dnet-spark-jobs
|
||
|
spec:
|
||
|
type: Scala
|
||
|
mode: cluster
|
||
|
image: "dnet-spark:1.0.0"
|
||
|
imagePullPolicy: IfNotPresent
|
||
|
mainClass: eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump
|
||
|
mainApplicationFile: "s3a://lib/dhp-shade-package-1.2.5-SNAPSHOT.jar"
|
||
|
arguments: [
|
||
|
"--sourcePath", "s3a://raw-graph",
|
||
|
"--targetPath", "s3a://scholix",
|
||
|
"--master", "local[*]" ]
|
||
|
sparkVersion: "3.5.1"
|
||
|
sparkConf:
|
||
|
spark.driver.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
||
|
spark.executor.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
||
|
spark.hadoop.fs.defaultFS: "s3a://scholix"
|
||
|
spark.hadoop.fs.s3a.access.key: "minio"
|
||
|
spark.hadoop.fs.s3a.secret.key: "minio123"
|
||
|
spark.hadoop.fs.s3a.endpoint: "https://minio.dnet-minio-tenant.svc.cluster.local"
|
||
|
spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem"
|
||
|
spark.hadoop.fs.s3a.path.style.access: "true"
|
||
|
spark.hadoop.fs.s3a.attempts.maximum: "1"
|
||
|
spark.hadoop.fs.s3a.connection.establish.timeout : "5000"
|
||
|
spark.hadoop.fs.s3a.connection.timeout: "10001"
|
||
|
spark.hadoop.fs.s3a.connection.ssl.enabled: "false"
|
||
|
com.amazonaws.sdk.disableCertChecking: "true"
|
||
|
com.cloudera.com.amazonaws.sdk.disableCertChecking: "true"
|
||
|
fs.s3a.connection.ssl.strictverify: "false"
|
||
|
fs.s3a.connection.ssl.enabled: "false"
|
||
|
fs.s3a.ssl.enabled: "false"
|
||
|
spark.hadoop.fs.s3a.ssl.enabled: "false"
|
||
|
restartPolicy:
|
||
|
type: Never
|
||
|
volumes:
|
||
|
- name: "test-volume"
|
||
|
persistentVolumeClaim:
|
||
|
claimName: my-spark-pvc-tmp
|
||
|
dynamicAllocation:
|
||
|
enabled: true
|
||
|
initialExecutors: 2
|
||
|
minExecutors: 2
|
||
|
maxExecutors: 16
|
||
|
driver:
|
||
|
javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
||
|
cores: 1
|
||
|
coreLimit: "1200m"
|
||
|
memory: "2G"
|
||
|
labels:
|
||
|
version: 3.5.1
|
||
|
serviceAccount: spark
|
||
|
volumeMounts:
|
||
|
- name: "test-volume"
|
||
|
mountPath: "/tmp"
|
||
|
executor:
|
||
|
javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
||
|
cores: 1
|
||
|
memory: "2G"
|
||
|
labels:
|
||
|
version: 3.5.1
|
||
|
volumeMounts:
|
||
|
- name: "test-volume"
|
||
|
mountPath: "/tmp"
|