minor fix

This commit is contained in:
Sandro La Bruzzo 2024-05-02 14:23:45 +02:00
parent ef104b10e6
commit 3666871e4d
2 changed files with 11 additions and 12 deletions

View File

@ -58,15 +58,14 @@ default_args = {
}
spec =SparkConfigurator(
name="spark-scholix",
mainClass="eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump",
jarLocation="s3a://deps/dhp-shade-package-1.2.5-SNAPSHOT.jar",
arguments =[ "--sourcePath", "s3a://raw-graph/01", "--targetPath", "s3a://scholix"],
executor_cores=10,
executor_memory="3G",
executor_instances=1,
executor_memoryOverhead="3G"
).get_configuration()
name="spark-scholix", \
mainClass="eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump", \
jarLocation = 's3a://deps/dhp-shade-package-1.2.5-SNAPSHOT.jar'\
,arguments =[ "--sourcePath", "s3a://raw-graph/01", "--targetPath", "s3a://scholix"],\
executor_cores=10,\
executor_memory="4G", \
executor_instances=1, \
executor_memoryOverhead="3G").get_configuration()
logger.info("found configuration")

View File

@ -2,7 +2,7 @@ class SparkConfigurator:
def __init__(self,
name,
mainClass,
jarLocation,
jarLocation:str,
arguments,
apiVersion=None,
namespace="dnet-spark-jobs",
@ -22,7 +22,7 @@ class SparkConfigurator:
self.name = name
self.image= image
self.mainClass = mainClass
self.jarLocation = jarLocation,
self.jarLocation = jarLocation
self.arguments= arguments
self.s3Configuration = {
"spark.driver.extraJavaOptions": "-Divy.cache.dir=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true -Dcom.cloudera.com.amazonaws.sdk.disableCertChecking=true",
@ -68,7 +68,7 @@ class SparkConfigurator:
"image":self.image,
"imagePullPolicy": "IfNotPresent",
"mainClass": self.mainClass,
"mainApplicationFile": self.mainClass,
"mainApplicationFile": self.jarLocation,
"arguments": self.arguments,
"sparkVersion": "3.5.1",
"sparkConf": self.s3Configuration,