forked from D-Net/dnet-hadoop
WIP SparkCreateMergeRels distinct relations
This commit is contained in:
parent
7dd91edf43
commit
c8284bab06
|
@ -36,11 +36,11 @@ public class RelationAggregator extends Aggregator<Relation, Relation, Relation>
|
|||
|
||||
@Override
|
||||
public Encoder<Relation> bufferEncoder() {
|
||||
return Encoders.bean(Relation.class);
|
||||
return Encoders.kryo(Relation.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Encoder<Relation> outputEncoder() {
|
||||
return Encoders.bean(Relation.class);
|
||||
return Encoders.kryo(Relation.class);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,10 +98,10 @@ public class SparkPropagateRelation extends AbstractSparkAction {
|
|||
getDeletedFn());
|
||||
|
||||
save(
|
||||
newRels
|
||||
distinctRelations(newRels
|
||||
.union(updated)
|
||||
.union(mergeRels)
|
||||
.map((MapFunction<Relation, Relation>) r -> r, Encoders.kryo(Relation.class)),
|
||||
.map((MapFunction<Relation, Relation>) r -> r, Encoders.kryo(Relation.class))),
|
||||
outputRelationPath, SaveMode.Overwrite);
|
||||
}
|
||||
|
||||
|
|
|
@ -77,11 +77,13 @@ public class SparkDedupTest implements Serializable {
|
|||
FileUtils.deleteDirectory(new File(testOutputBasePath));
|
||||
FileUtils.deleteDirectory(new File(testDedupGraphBasePath));
|
||||
|
||||
final SparkConf conf = new SparkConf();
|
||||
conf.set("spark.sql.shuffle.partitions", "200");
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(SparkDedupTest.class.getSimpleName())
|
||||
.master("local[*]")
|
||||
.config(new SparkConf())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
|
||||
jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
|
Loading…
Reference in New Issue