2024-05-01 16:35:21 +02:00
|
|
|
apiVersion: "sparkoperator.k8s.io/v1beta2"
|
|
|
|
kind: SparkApplication
|
|
|
|
metadata:
|
|
|
|
name: spark-scholix
|
|
|
|
namespace: dnet-spark-jobs
|
|
|
|
spec:
|
|
|
|
type: Scala
|
|
|
|
mode: cluster
|
|
|
|
image: "dnet-spark:1.0.0"
|
|
|
|
imagePullPolicy: IfNotPresent
|
|
|
|
mainClass: eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump
|
2024-05-02 10:33:21 +02:00
|
|
|
mainApplicationFile: "s3a://deps/dhp-shade-package-1.2.5-SNAPSHOT.jar"
|
2024-05-01 16:35:21 +02:00
|
|
|
arguments: [
|
2024-05-02 10:33:21 +02:00
|
|
|
"--sourcePath", "s3a://raw-graph/01",
|
|
|
|
"--targetPath", "s3a://scholix"]
|
2024-05-01 16:35:21 +02:00
|
|
|
sparkVersion: "3.5.1"
|
|
|
|
sparkConf:
|
|
|
|
spark.driver.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
|
|
|
spark.executor.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
|
|
|
spark.hadoop.fs.defaultFS: "s3a://scholix"
|
|
|
|
spark.hadoop.fs.s3a.access.key: "minio"
|
|
|
|
spark.hadoop.fs.s3a.secret.key: "minio123"
|
|
|
|
spark.hadoop.fs.s3a.endpoint: "https://minio.dnet-minio-tenant.svc.cluster.local"
|
|
|
|
spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem"
|
|
|
|
spark.hadoop.fs.s3a.path.style.access: "true"
|
|
|
|
spark.hadoop.fs.s3a.attempts.maximum: "1"
|
|
|
|
spark.hadoop.fs.s3a.connection.establish.timeout : "5000"
|
|
|
|
spark.hadoop.fs.s3a.connection.timeout: "10001"
|
|
|
|
spark.hadoop.fs.s3a.connection.ssl.enabled: "false"
|
|
|
|
com.amazonaws.sdk.disableCertChecking: "true"
|
|
|
|
com.cloudera.com.amazonaws.sdk.disableCertChecking: "true"
|
|
|
|
fs.s3a.connection.ssl.strictverify: "false"
|
|
|
|
fs.s3a.connection.ssl.enabled: "false"
|
|
|
|
fs.s3a.ssl.enabled: "false"
|
|
|
|
spark.hadoop.fs.s3a.ssl.enabled: "false"
|
|
|
|
restartPolicy:
|
|
|
|
type: Never
|
|
|
|
volumes:
|
|
|
|
- name: "test-volume"
|
|
|
|
persistentVolumeClaim:
|
|
|
|
claimName: my-spark-pvc-tmp
|
|
|
|
driver:
|
|
|
|
javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
|
|
|
cores: 1
|
|
|
|
coreLimit: "1200m"
|
|
|
|
memory: "2G"
|
|
|
|
labels:
|
|
|
|
version: 3.5.1
|
|
|
|
serviceAccount: spark
|
|
|
|
volumeMounts:
|
|
|
|
- name: "test-volume"
|
|
|
|
mountPath: "/tmp"
|
|
|
|
executor:
|
|
|
|
javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
2024-05-02 10:33:21 +02:00
|
|
|
cores: 10
|
|
|
|
memoryOverhead: "3G"
|
|
|
|
memory: "4G"
|
|
|
|
instances: 1
|
2024-05-01 16:35:21 +02:00
|
|
|
labels:
|
|
|
|
version: 3.5.1
|
|
|
|
volumeMounts:
|
|
|
|
- name: "test-volume"
|
|
|
|
mountPath: "/tmp"
|