removed dataset caching as it seems to impact too much on the memory footprint
This commit is contained in:
parent
6c31fddd03
commit
3549b6976d
|
@ -639,6 +639,7 @@
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=8000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--master</arg><arg>yarn</arg>
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
<arg>--hdfsPath</arg><arg>${workingDir}/graph_raw</arg>
|
<arg>--hdfsPath</arg><arg>${workingDir}/graph_raw</arg>
|
||||||
|
|
|
@ -62,7 +62,6 @@ object CopyHdfsOafSparkApplication {
|
||||||
val oaf = spark.read
|
val oaf = spark.read
|
||||||
.textFile(validPaths: _*)
|
.textFile(validPaths: _*)
|
||||||
.map(v => (getOafType(v), v))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
.map(v => (getOafType(v), v))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||||
.cache()
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ModelSupport.oafTypes
|
ModelSupport.oafTypes
|
||||||
|
|
Loading…
Reference in New Issue