removed dataset caching as it seems to impact too much on the memory footprint
This commit is contained in:
parent
6c31fddd03
commit
3549b6976d
|
@ -639,6 +639,7 @@
|
|||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
</spark-opts>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/graph_raw</arg>
|
||||
|
|
|
@ -62,7 +62,6 @@ object CopyHdfsOafSparkApplication {
|
|||
val oaf = spark.read
|
||||
.textFile(validPaths: _*)
|
||||
.map(v => (getOafType(v), v))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||
.cache()
|
||||
|
||||
try {
|
||||
ModelSupport.oafTypes
|
||||
|
|
Loading…
Reference in New Issue