2022-01-26 16:02:50 +01:00
1 changed files with 78 additions and 26 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/group/GroupEntitiesSparkJobTest.java
@ -1,6 +1,8 @@
 package eu.dnetlib.dhp.oa.graph.group;
 import static org.junit.jupiter.api.Assertions.*;
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.nio.file.Files;
@ -19,22 +21,34 @@ import org.junit.jupiter.api.*;
 import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.Lists;
 import eu.dnetlib.dhp.common.HdfsSupport;
 import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob;
 import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.oaf.Publication;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.utils.DHPUtils;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
 public class GroupEntitiesSparkJobTest {
 	private static SparkSession spark;
-	private Path workingDir;
+	private static ObjectMapper mapper = new ObjectMapper()
-	private Path graphInputPath;
+		.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-	private Path outputPath;
+	private static Path workingDir;
 	private Path dataInputPath;
 	private Path groupEntityPath;
 	private Path dispatchEntityPath;
 	@BeforeAll
-	public static void beforeAll() {
+	public static void beforeAll() throws IOException {
 		workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName());
 		SparkConf conf = new SparkConf();
 		conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName());
 		conf.setMaster("local");
@ -45,41 +59,36 @@ public class GroupEntitiesSparkJobTest {
 	@BeforeEach
 	public void beforeEach() throws IOException, URISyntaxException {
-		workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName());
+		dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI());
-		graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI());
+		groupEntityPath = workingDir.resolve("grouped_entity");
-		outputPath = workingDir.resolve("output");
+		dispatchEntityPath = workingDir.resolve("dispatched_entity");
 	}
 	@AfterEach
 	public void afterEach() throws IOException {
 		FileUtils.deleteDirectory(workingDir.toFile());
 	}
 	@AfterAll
-	public static void afterAll() {
+	public static void afterAll() throws IOException {
 		spark.stop();
 		FileUtils.deleteDirectory(workingDir.toFile());
 	}
 	@Test
 	@Order(1)
 	void testGroupEntities() throws Exception {
 		GroupEntitiesSparkJob.main(new String[] {
 			"-isSparkSessionManaged",
 			Boolean.FALSE.toString(),
 			"-graphInputPath",
-			graphInputPath.toString(),
+			dataInputPath.toString(),
 			"-outputPath",
-			outputPath.toString()
+			groupEntityPath.toString()
 		});
 		ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
 		Dataset<Result> output = spark
 			.read()
-			.textFile(outputPath.toString())
+			.textFile(groupEntityPath.toString())
 			.map((MapFunction<String, String>) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING())
 			.map((MapFunction<String, Result>) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class));
-		Assertions
+		assertEquals(
 			.assertEquals(
 			1,
 			output
 				.filter(
@ -89,4 +98,47 @@ public class GroupEntitiesSparkJobTest {
 				.count());
 	}
 	@Test
 	@Order(2)
 	void testDispatchEntities() throws Exception {
 		for (String type : Lists
 			.newArrayList(
 				Publication.class.getCanonicalName(), eu.dnetlib.dhp.schema.oaf.Dataset.class.getCanonicalName())) {
 			String directory = StringUtils.substringAfterLast(type, ".").toLowerCase();
 			DispatchEntitiesSparkJob.main(new String[] {
 				"-isSparkSessionManaged",
 				Boolean.FALSE.toString(),
 				"-inputPath",
 				groupEntityPath.toString(),
 				"-outputPath",
 				dispatchEntityPath.resolve(directory).toString(),
 				"-graphTableClassName",
 				type
 			});
 		}
 		Dataset<Result> output = spark
 			.read()
 			.textFile(
 				DHPUtils
 					.toSeq(
 						HdfsSupport
 							.listFiles(dispatchEntityPath.toString(), spark.sparkContext().hadoopConfiguration())))
 			.map((MapFunction<String, Result>) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class));
 		assertEquals(3, output.count());
 		assertEquals(
 			2,
 			output
 				.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING())
 				.filter((FilterFunction<String>) s -> s.equals("publication"))
 				.count());
 		assertEquals(
 			1,
 			output
 				.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING())
 				.filter((FilterFunction<String>) s -> s.equals("dataset"))
 				.count());
 	}
 }