added spark driver image
This commit is contained in:
parent
0863c9b2e9
commit
32e8e86aa7
|
@ -1,6 +1,6 @@
|
|||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
name: openaire-data-platform
|
||||
name: dnet-data-platform
|
||||
|
||||
nodes:
|
||||
- role: control-plane
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
env = "local"
|
||||
kube_context= "kind-openaire-data-platform"
|
||||
kube_context= "kind-dnet-data-platform"
|
||||
domain = "local-dataplatform"
|
||||
admin_user = "admin"
|
||||
admin_password = "admin"
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
FROM spark:3.5.1-scala2.12-java17-ubuntu
|
||||
|
||||
user root
|
||||
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar -o ${SPARK_HOME}/jars/hadoop-aws-3.3.4.jar
|
||||
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-1.12.262.jar
|
||||
|
||||
|
||||
user spark
|
|
@ -0,0 +1,67 @@
|
|||
apiVersion: "sparkoperator.k8s.io/v1beta2"
|
||||
kind: SparkApplication
|
||||
metadata:
|
||||
name: spark-scholix
|
||||
namespace: dnet-spark-jobs
|
||||
spec:
|
||||
type: Scala
|
||||
mode: cluster
|
||||
image: "dnet-spark:1.0.0"
|
||||
imagePullPolicy: IfNotPresent
|
||||
mainClass: eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump
|
||||
mainApplicationFile: "s3a://lib/dhp-shade-package-1.2.5-SNAPSHOT.jar"
|
||||
arguments: [
|
||||
"--sourcePath", "s3a://raw-graph",
|
||||
"--targetPath", "s3a://scholix",
|
||||
"--master", "local[*]" ]
|
||||
sparkVersion: "3.5.1"
|
||||
sparkConf:
|
||||
spark.driver.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
||||
spark.executor.extraJavaOptions: "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
||||
spark.hadoop.fs.defaultFS: "s3a://scholix"
|
||||
spark.hadoop.fs.s3a.access.key: "minio"
|
||||
spark.hadoop.fs.s3a.secret.key: "minio123"
|
||||
spark.hadoop.fs.s3a.endpoint: "https://minio.dnet-minio-tenant.svc.cluster.local"
|
||||
spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem"
|
||||
spark.hadoop.fs.s3a.path.style.access: "true"
|
||||
spark.hadoop.fs.s3a.attempts.maximum: "1"
|
||||
spark.hadoop.fs.s3a.connection.establish.timeout : "5000"
|
||||
spark.hadoop.fs.s3a.connection.timeout: "10001"
|
||||
spark.hadoop.fs.s3a.connection.ssl.enabled: "false"
|
||||
com.amazonaws.sdk.disableCertChecking: "true"
|
||||
com.cloudera.com.amazonaws.sdk.disableCertChecking: "true"
|
||||
fs.s3a.connection.ssl.strictverify: "false"
|
||||
fs.s3a.connection.ssl.enabled: "false"
|
||||
fs.s3a.ssl.enabled: "false"
|
||||
spark.hadoop.fs.s3a.ssl.enabled: "false"
|
||||
restartPolicy:
|
||||
type: Never
|
||||
volumes:
|
||||
- name: "test-volume"
|
||||
persistentVolumeClaim:
|
||||
claimName: my-spark-pvc-tmp
|
||||
dynamicAllocation:
|
||||
enabled: true
|
||||
initialExecutors: 2
|
||||
minExecutors: 2
|
||||
maxExecutors: 16
|
||||
driver:
|
||||
javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
||||
cores: 1
|
||||
coreLimit: "1200m"
|
||||
memory: "2G"
|
||||
labels:
|
||||
version: 3.5.1
|
||||
serviceAccount: spark
|
||||
volumeMounts:
|
||||
- name: "test-volume"
|
||||
mountPath: "/tmp"
|
||||
executor:
|
||||
javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true"
|
||||
cores: 1
|
||||
memory: "2G"
|
||||
labels:
|
||||
version: 3.5.1
|
||||
volumeMounts:
|
||||
- name: "test-volume"
|
||||
mountPath: "/tmp"
|
Loading…
Reference in New Issue