forked from D-Net/dnet-hadoop
repartition the join_entities in 24k files
This commit is contained in:
parent
2f1a623d09
commit
cfd753217c
|
@ -362,7 +362,7 @@
|
|||
<arg>--inputGraphRootPath</arg><arg>${inputGraphRootPath}</arg>
|
||||
<arg>--inputRelatedEntitiesPath</arg><arg>${workingDir}/join_partial</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/join_entities</arg>
|
||||
<arg>--numPartitions</arg><arg>12000</arg>
|
||||
<arg>--numPartitions</arg><arg>24000</arg>
|
||||
</spark>
|
||||
<ok to="adjancency_lists"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -386,7 +386,7 @@
|
|||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg> <arg>${workingDir}/join_entities</arg>
|
||||
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/joined</arg>
|
||||
</spark>
|
||||
<ok to="convert_to_xml"/>
|
||||
|
|
Loading…
Reference in New Issue