forked from D-Net/dnet-hadoop
spark dedup test fixed
This commit is contained in:
parent
d772d967aa
commit
5c9ef08a8e
|
@ -272,7 +272,17 @@ public class SparkDedupTest implements Serializable {
|
||||||
.distinct()
|
.distinct()
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
assertEquals(831, publications);
|
long mergedSw =
|
||||||
|
spark.read()
|
||||||
|
.load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")
|
||||||
|
.as(Encoders.bean(Relation.class))
|
||||||
|
.where("relClass=='merges'")
|
||||||
|
.javaRDD()
|
||||||
|
.map(Relation::getTarget)
|
||||||
|
.distinct()
|
||||||
|
.count();
|
||||||
|
|
||||||
|
assertEquals(897, publications);
|
||||||
assertEquals(835, organizations);
|
assertEquals(835, organizations);
|
||||||
assertEquals(100, projects);
|
assertEquals(100, projects);
|
||||||
assertEquals(100, datasource);
|
assertEquals(100, datasource);
|
||||||
|
@ -288,8 +298,14 @@ public class SparkDedupTest implements Serializable {
|
||||||
.filter(this::isDeletedByInference)
|
.filter(this::isDeletedByInference)
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
|
long deletedSw =
|
||||||
|
jsc.textFile(testDedupGraphBasePath + "/software")
|
||||||
|
.filter(this::isDeletedByInference)
|
||||||
|
.count();
|
||||||
|
|
||||||
assertEquals(mergedOrgs, deletedOrgs);
|
assertEquals(mergedOrgs, deletedOrgs);
|
||||||
assertEquals(mergedPubs, deletedPubs);
|
assertEquals(mergedPubs, deletedPubs);
|
||||||
|
assertEquals(mergedSw, deletedSw);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue