From 5c9ef08a8e0afc0f6b99ece47930e3d8e9e332ed Mon Sep 17 00:00:00 2001 From: miconis Date: Tue, 21 Apr 2020 10:19:04 +0200 Subject: [PATCH] spark dedup test fixed --- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 8fd7a0ec9..695498a6e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -272,7 +272,17 @@ public class SparkDedupTest implements Serializable { .distinct() .count(); - assertEquals(831, publications); + long mergedSw = + spark.read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + assertEquals(897, publications); assertEquals(835, organizations); assertEquals(100, projects); assertEquals(100, datasource); @@ -288,8 +298,14 @@ public class SparkDedupTest implements Serializable { .filter(this::isDeletedByInference) .count(); + long deletedSw = + jsc.textFile(testDedupGraphBasePath + "/software") + .filter(this::isDeletedByInference) + .count(); + assertEquals(mergedOrgs, deletedOrgs); assertEquals(mergedPubs, deletedPubs); + assertEquals(mergedSw, deletedSw); } @Test