oct-update #1
|
@ -27,7 +27,7 @@ https://github.com/GoogleCloudPlatform/spark-on-k8s-operator
|
||||||
|
|
||||||
from os import path
|
from os import path
|
||||||
from datetime import timedelta, datetime
|
from datetime import timedelta, datetime
|
||||||
# from workflow.dnet.spark_configurator import SparkConfigurator
|
from spark_configurator import SparkConfigurator
|
||||||
|
|
||||||
# [START import_module]
|
# [START import_module]
|
||||||
# The DAG object; we'll need this to instantiate a DAG
|
# The DAG object; we'll need this to instantiate a DAG
|
||||||
|
@ -40,128 +40,6 @@ from airflow.utils.dates import days_ago
|
||||||
|
|
||||||
# [END import_module]
|
# [END import_module]
|
||||||
|
|
||||||
|
|
||||||
class SparkConfigurator:
|
|
||||||
def __init__(self,
|
|
||||||
name,
|
|
||||||
mainClass,
|
|
||||||
jarLocation:str,
|
|
||||||
arguments,
|
|
||||||
apiVersion=None,
|
|
||||||
namespace="dnet-spark-jobs",
|
|
||||||
image= "dnet-spark:1.0.0",
|
|
||||||
driver_cores=1,
|
|
||||||
driver_memory='1G',
|
|
||||||
executor_cores=1,
|
|
||||||
executor_memory="1G",
|
|
||||||
executor_memoryOverhead= "1G",
|
|
||||||
executor_instances=1
|
|
||||||
) -> None:
|
|
||||||
if apiVersion:
|
|
||||||
self.apiVersion = apiVersion
|
|
||||||
else:
|
|
||||||
self.apiVersion = "sparkoperator.k8s.io/v1beta2"
|
|
||||||
self.namespace= namespace
|
|
||||||
self.name = name
|
|
||||||
self.image= image
|
|
||||||
self.mainClass = mainClass
|
|
||||||
self.jarLocation = jarLocation
|
|
||||||
self.arguments= arguments
|
|
||||||
self.s3Configuration = {
|
|
||||||
"spark.driver.extraJavaOptions": "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true",
|
|
||||||
"spark.executor.extraJavaOptions": "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true",
|
|
||||||
"spark.hadoop.fs.defaultFS": "s3a://spark",
|
|
||||||
"spark.hadoop.fs.s3a.access.key": "minio",
|
|
||||||
"spark.hadoop.fs.s3a.secret.key": "minio123",
|
|
||||||
"spark.hadoop.fs.s3a.endpoint": "https://minio.dnet-minio-tenant.svc.cluster.local",
|
|
||||||
"spark.hadoop.fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem",
|
|
||||||
"spark.hadoop.fs.s3a.path.style.access": "true",
|
|
||||||
"spark.hadoop.fs.s3a.attempts.maximum": "1",
|
|
||||||
"spark.hadoop.fs.s3a.connection.establish.timeout": "5000",
|
|
||||||
"spark.hadoop.fs.s3a.connection.timeout": "10001",
|
|
||||||
"spark.hadoop.fs.s3a.connection.ssl.enabled": "false",
|
|
||||||
"com.amazonaws.sdk.disableCertChecking": "true",
|
|
||||||
"com.cloudera.com.amazonaws.sdk.disableCertChecking": "true",
|
|
||||||
"fs.s3a.connection.ssl.strictverify": "false",
|
|
||||||
"fs.s3a.connection.ssl.enabled": "false",
|
|
||||||
"fs.s3a.ssl.enabled": "false",
|
|
||||||
"spark.hadoop.fs.s3a.ssl.enabled": "false"
|
|
||||||
}
|
|
||||||
self.sparkResoruceConf= {
|
|
||||||
'driver_cores':driver_cores,
|
|
||||||
'driver_memory':driver_memory,
|
|
||||||
'executor_cores':executor_cores,
|
|
||||||
'executor_memory':executor_memory,
|
|
||||||
'executor_instances':executor_instances,
|
|
||||||
'memoryOverhead':executor_memoryOverhead
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_configuration(self) -> dict:
|
|
||||||
return {
|
|
||||||
"apiVersion": self.apiVersion,
|
|
||||||
"kind": "SparkApplication",
|
|
||||||
"metadata": {
|
|
||||||
"name": self.name,
|
|
||||||
"namespace": self.namespace
|
|
||||||
},
|
|
||||||
"spec": {
|
|
||||||
"type": "Scala",
|
|
||||||
"mode": "cluster",
|
|
||||||
"image":self.image,
|
|
||||||
"imagePullPolicy": "IfNotPresent",
|
|
||||||
"mainClass": self.mainClass,
|
|
||||||
"mainApplicationFile": self.jarLocation,
|
|
||||||
"arguments": self.arguments,
|
|
||||||
"sparkVersion": "3.5.1",
|
|
||||||
"sparkConf": self.s3Configuration,
|
|
||||||
"restartPolicy": {
|
|
||||||
"type": "Never"
|
|
||||||
},
|
|
||||||
"volumes": [
|
|
||||||
{
|
|
||||||
"name": "test-volume",
|
|
||||||
"persistentVolumeClaim": {
|
|
||||||
"claimName": "my-spark-pvc-tmp"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"driver": {
|
|
||||||
"javaOptions": "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true",
|
|
||||||
"cores": self.sparkResoruceConf['driver_cores'],
|
|
||||||
"coreLimit": "1200m",
|
|
||||||
"memory": self.sparkResoruceConf['driver_memory'],
|
|
||||||
"labels": {
|
|
||||||
"version": "3.5.1"
|
|
||||||
},
|
|
||||||
"serviceAccount": "spark",
|
|
||||||
"volumeMounts": [
|
|
||||||
{
|
|
||||||
"name": "test-volume",
|
|
||||||
"mountPath": "/tmp"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"executor": {
|
|
||||||
"javaOptions": "-Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true",
|
|
||||||
"cores": self.sparkResoruceConf['executor_cores'],
|
|
||||||
"memoryOverhead": self.sparkResoruceConf['memoryOverhead'],
|
|
||||||
"memory": self.sparkResoruceConf['executor_memory'],
|
|
||||||
"instances": self.sparkResoruceConf['executor_instances'],
|
|
||||||
"labels": {
|
|
||||||
"version": "3.5.1"
|
|
||||||
},
|
|
||||||
"volumeMounts": [
|
|
||||||
{
|
|
||||||
"name": "test-volume",
|
|
||||||
"mountPath": "/tmp"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# [START default_args]
|
# [START default_args]
|
||||||
# These args will get passed on to each operator
|
# These args will get passed on to each operator
|
||||||
# You can override them on a per-task basis during operator initialization
|
# You can override them on a per-task basis during operator initialization
|
||||||
|
|
Loading…
Reference in New Issue