forked from D-Net/dnet-hadoop
increased number of partitions produced by the join_all_entities phase as well as spark.sql.shuffle.partitions in adjancency_lists phase
This commit is contained in:
parent
fdd54bad1c
commit
5dea155a87
|
@ -362,7 +362,7 @@
|
||||||
<arg>--inputGraphRootPath</arg><arg>${inputGraphRootPath}</arg>
|
<arg>--inputGraphRootPath</arg><arg>${inputGraphRootPath}</arg>
|
||||||
<arg>--inputRelatedEntitiesPath</arg><arg>${workingDir}/join_partial</arg>
|
<arg>--inputRelatedEntitiesPath</arg><arg>${workingDir}/join_partial</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/join_entities</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/join_entities</arg>
|
||||||
<arg>--numPartitions</arg><arg>24000</arg>
|
<arg>--numPartitions</arg><arg>35000</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="adjancency_lists"/>
|
<ok to="adjancency_lists"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -383,7 +383,7 @@
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=7680
|
--conf spark.sql.shuffle.partitions=15000
|
||||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
|
||||||
|
|
Loading…
Reference in New Issue