From d9f80488cc471bcb28b294de523e7ccac9bd571b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 2 Dec 2021 14:15:19 +0100 Subject: [PATCH] [GRAPH DUMP] Add one more test to check the filtering of the relations --- .../graph/dump/complete/DumpRelationTest.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index 3dbf2b09d..8a2eb585e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -260,4 +260,54 @@ public class DumpRelationTest { .count()); } + @Test + public void test4() throws Exception {// + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); + + SparkDumpRelationJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant;isAuthorInstitutionOf" + }); + + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDataset.createOrReplaceTempView("table"); + + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset check = spark + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); + + Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); + Assertions + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); + + + Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count()); + + } + }