forked from D-Net/dnet-hadoop
added unit test for the DispatchEntitiesJob
This commit is contained in:
parent
abfa9c6045
commit
3b9020c1b7
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.group;
|
package eu.dnetlib.dhp.oa.graph.group;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
@ -19,22 +21,34 @@ import org.junit.jupiter.api.*;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob;
|
||||||
import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
|
import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||||
public class GroupEntitiesSparkJobTest {
|
public class GroupEntitiesSparkJobTest {
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private Path workingDir;
|
private static ObjectMapper mapper = new ObjectMapper()
|
||||||
private Path graphInputPath;
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
private Path outputPath;
|
private static Path workingDir;
|
||||||
|
private Path dataInputPath;
|
||||||
|
|
||||||
|
private Path groupEntityPath;
|
||||||
|
private Path dispatchEntityPath;
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() {
|
public static void beforeAll() throws IOException {
|
||||||
|
workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName());
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName());
|
conf.setAppName(GroupEntitiesSparkJob.class.getSimpleName());
|
||||||
conf.setMaster("local");
|
conf.setMaster("local");
|
||||||
|
@ -45,48 +59,86 @@ public class GroupEntitiesSparkJobTest {
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void beforeEach() throws IOException, URISyntaxException {
|
public void beforeEach() throws IOException, URISyntaxException {
|
||||||
workingDir = Files.createTempDirectory(GroupEntitiesSparkJob.class.getSimpleName());
|
dataInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI());
|
||||||
graphInputPath = Paths.get(ClassLoader.getSystemResource("eu/dnetlib/dhp/oa/graph/group").toURI());
|
groupEntityPath = workingDir.resolve("grouped_entity");
|
||||||
outputPath = workingDir.resolve("output");
|
dispatchEntityPath = workingDir.resolve("dispatched_entity");
|
||||||
}
|
|
||||||
|
|
||||||
@AfterEach
|
|
||||||
public void afterEach() throws IOException {
|
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterAll
|
@AfterAll
|
||||||
public static void afterAll() {
|
public static void afterAll() throws IOException {
|
||||||
spark.stop();
|
spark.stop();
|
||||||
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@Order(1)
|
||||||
void testGroupEntities() throws Exception {
|
void testGroupEntities() throws Exception {
|
||||||
GroupEntitiesSparkJob.main(new String[] {
|
GroupEntitiesSparkJob.main(new String[] {
|
||||||
"-isSparkSessionManaged",
|
"-isSparkSessionManaged",
|
||||||
Boolean.FALSE.toString(),
|
Boolean.FALSE.toString(),
|
||||||
"-graphInputPath",
|
"-graphInputPath",
|
||||||
graphInputPath.toString(),
|
dataInputPath.toString(),
|
||||||
"-outputPath",
|
"-outputPath",
|
||||||
outputPath.toString()
|
groupEntityPath.toString()
|
||||||
});
|
});
|
||||||
|
|
||||||
ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
|
||||||
Dataset<Result> output = spark
|
Dataset<Result> output = spark
|
||||||
.read()
|
.read()
|
||||||
.textFile(outputPath.toString())
|
.textFile(groupEntityPath.toString())
|
||||||
.map((MapFunction<String, String>) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING())
|
.map((MapFunction<String, String>) s -> StringUtils.substringAfter(s, "|"), Encoders.STRING())
|
||||||
.map((MapFunction<String, Result>) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class));
|
.map((MapFunction<String, Result>) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class));
|
||||||
|
|
||||||
Assertions
|
assertEquals(
|
||||||
.assertEquals(
|
1,
|
||||||
1,
|
output
|
||||||
output
|
.filter(
|
||||||
.filter(
|
(FilterFunction<Result>) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9"
|
||||||
(FilterFunction<Result>) r -> "50|doi_________::09821844208a5cd6300b2bfb13bca1b9"
|
.equals(r.getId()) &&
|
||||||
.equals(r.getId()) &&
|
r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo")))
|
||||||
r.getCollectedfrom().stream().anyMatch(kv -> kv.getValue().equalsIgnoreCase("zenodo")))
|
.count());
|
||||||
.count());
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Order(2)
|
||||||
|
void testDispatchEntities() throws Exception {
|
||||||
|
for (String type : Lists
|
||||||
|
.newArrayList(
|
||||||
|
Publication.class.getCanonicalName(), eu.dnetlib.dhp.schema.oaf.Dataset.class.getCanonicalName())) {
|
||||||
|
String directory = StringUtils.substringAfterLast(type, ".").toLowerCase();
|
||||||
|
DispatchEntitiesSparkJob.main(new String[] {
|
||||||
|
"-isSparkSessionManaged",
|
||||||
|
Boolean.FALSE.toString(),
|
||||||
|
"-inputPath",
|
||||||
|
groupEntityPath.toString(),
|
||||||
|
"-outputPath",
|
||||||
|
dispatchEntityPath.resolve(directory).toString(),
|
||||||
|
"-graphTableClassName",
|
||||||
|
type
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Dataset<Result> output = spark
|
||||||
|
.read()
|
||||||
|
.textFile(
|
||||||
|
DHPUtils
|
||||||
|
.toSeq(
|
||||||
|
HdfsSupport
|
||||||
|
.listFiles(dispatchEntityPath.toString(), spark.sparkContext().hadoopConfiguration())))
|
||||||
|
.map((MapFunction<String, Result>) s -> mapper.readValue(s, Result.class), Encoders.bean(Result.class));
|
||||||
|
|
||||||
|
assertEquals(3, output.count());
|
||||||
|
assertEquals(
|
||||||
|
2,
|
||||||
|
output
|
||||||
|
.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING())
|
||||||
|
.filter((FilterFunction<String>) s -> s.equals("publication"))
|
||||||
|
.count());
|
||||||
|
assertEquals(
|
||||||
|
1,
|
||||||
|
output
|
||||||
|
.map((MapFunction<Result, String>) r -> r.getResulttype().getClassid(), Encoders.STRING())
|
||||||
|
.filter((FilterFunction<String>) s -> s.equals("dataset"))
|
||||||
|
.count());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue