repartition the join_entities in 24k files

This commit is contained in:
Claudio Atzori 2020-05-27 12:44:01 +02:00
parent 2f1a623d09
commit cfd753217c
1 changed files with 2 additions and 2 deletions

View File

@ -362,7 +362,7 @@
<arg>--inputGraphRootPath</arg><arg>${inputGraphRootPath}</arg>
<arg>--inputRelatedEntitiesPath</arg><arg>${workingDir}/join_partial</arg>
<arg>--outputPath</arg><arg>${workingDir}/join_entities</arg>
<arg>--numPartitions</arg><arg>12000</arg>
<arg>--numPartitions</arg><arg>24000</arg>
</spark>
<ok to="adjancency_lists"/>
<error to="Kill"/>
@ -386,7 +386,7 @@
--conf spark.sql.shuffle.partitions=7680
--conf spark.network.timeout=${sparkNetworkTimeout}
</spark-opts>
<arg>--inputPath</arg> <arg>${workingDir}/join_entities</arg>
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
<arg>--outputPath</arg><arg>${workingDir}/joined</arg>
</spark>
<ok to="convert_to_xml"/>