From 88562c0930dd65eef55e026ae2a299b56eeddeb5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Apr 2022 18:35:03 +0200 Subject: [PATCH] [EOSC TAG] added test for galaxy for title and description criterias --- .../dnetlib/dhp/bulktag/EOSCTagJobTest.java | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java index 82ac9f751..2c2334a6c 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/EOSCTagJobTest.java @@ -166,4 +166,84 @@ public class EOSCTagJobTest { } + @Test + void galaxyUpdatesTest() throws Exception { + spark.read().textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/software").getPath()) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, Software.class), Encoders.bean(Software.class)) + .write() + .option("compression","gzip") + .json(workingDir.toString() + "/input/software"); + + spark.read().textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/dataset").getPath()) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, Dataset.class), Encoders.bean(Dataset.class)) + .write() + .option("compression","gzip") + .json(workingDir.toString() + "/input/dataset"); + + spark.read().textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct").getPath()) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, OtherResearchProduct.class), Encoders.bean(OtherResearchProduct.class)) + .write() + .option("compression","gzip") + .json(workingDir.toString() + "/input/otherresearchproduct"); + + SparkEoscTag + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", + workingDir.toString() + "/input", + "-workingPath", workingDir.toString() + "/working" + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/input/software") + .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); + + Assertions.assertEquals(10, tmp.count()); + + Assertions.assertEquals(2, tmp.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow"))).count()); + + Assertions.assertEquals(2, tmp.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4")).collect() + .get(0).getSubject().size()); + Assertions.assertTrue(tmp.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4")).collect() + .get(0).getSubject().stream().anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); + + + Assertions.assertEquals(6, tmp.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11")).collect() + .get(0).getSubject().size()); + Assertions.assertTrue(tmp.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11")).collect() + .get(0).getSubject().stream().anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); + + Assertions.assertEquals(8, tmp.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56")).collect() + .get(0).getSubject().size()); + Assertions.assertFalse(tmp.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56")).collect() + .get(0).getSubject().stream().anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); + + JavaRDD orp = sc.textFile(workingDir.toString() + "/input/otherresearchproduct").map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class)); + + Assertions.assertEquals(10, orp.count()); + + Assertions.assertEquals(2, orp.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow"))).count()); + + + Assertions.assertEquals(3, orp.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07")).collect() + .get(0).getSubject().size()); + Assertions.assertTrue(orp.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07")).collect() + .get(0).getSubject().stream().anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); + + Assertions.assertEquals(2, orp.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5")).collect() + .get(0).getSubject().size()); + Assertions.assertFalse(orp.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5")).collect() + .get(0).getSubject().stream().anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); + + Assertions.assertEquals(3, orp.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72")).collect() + .get(0).getSubject().size()); + Assertions.assertTrue(orp.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72")).collect() + .get(0).getSubject().stream().anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); + + } + }