removed dataset caching as it seems to impact too much on the memory footprint

This commit is contained in:
Claudio Atzori 2025-01-08 15:44:58 +01:00
parent 6c31fddd03
commit 3549b6976d
2 changed files with 1 additions and 1 deletions

View File

@ -639,6 +639,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=8000
</spark-opts>
<arg>--master</arg><arg>yarn</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/graph_raw</arg>

View File

@ -62,7 +62,6 @@ object CopyHdfsOafSparkApplication {
val oaf = spark.read
.textFile(validPaths: _*)
.map(v => (getOafType(v), v))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
.cache()
try {
ModelSupport.oafTypes