dhp-graph-mapper workflow tests upgraded to junit5

This commit is contained in:
Claudio Atzori 2020-03-25 18:25:12 +01:00
parent cd7dc3e1ae
commit 9dff4adbc3
3 changed files with 36 additions and 53 deletions

View File

@ -1,31 +0,0 @@
package eu.dnetlib.dhp.graph;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.util.List;
import java.util.stream.Collectors;
public class SparkGraphImportCounterTest {
public static List<Tuple2<String, Long>> countEntities(final String inputPath) throws Exception {
final SparkSession spark = SparkSession
.builder()
.appName(SparkGraphImportCounterTest.class.getSimpleName())
.master("local[*]")
.getOrCreate();
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
return GraphMappingUtils.types.entrySet()
.stream()
.map(entry -> {
final Long count = spark.read().load(inputPath + "/" + entry.getKey()).as(Encoders.bean(entry.getValue())).count();
return new Tuple2<String, Long>(entry.getKey(), count);
})
.collect(Collectors.toList());
}
}

View File

@ -1,38 +1,52 @@
package eu.dnetlib.dhp.graph;
import org.apache.commons.io.FileUtils;
import org.junit.*;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import scala.Tuple2;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.stream.Collectors;
public class SparkGraphImporterJobTest {
private static final long MAX = 1000L;
private Path testDir;
@BeforeEach
public void setup() throws IOException {
testDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
public void tearDown() throws IOException {
FileUtils.deleteDirectory(testDir.toFile());
}
@Test
@Ignore
public void testImport() throws Exception {
@Disabled("must be parametrized to run locally")
public void testImport(@TempDir Path outPath) throws Exception {
SparkGraphImporterJob.main(new String[] {
"-mt", "local[*]",
"-i", getClass().getResource("/eu/dnetlib/dhp/dhp-sample/part-m-00010").getPath(),
"-o", testDir.toString()});
"-s", getClass().getResource("/eu/dnetlib/dhp/dhp-sample/publication_10001.json.gz").getPath(),
"-h", "",
"-db", "test"
});
SparkGraphImportCounterTest.countEntities(testDir.toString()).forEach(t -> {
countEntities(outPath.toString()).forEach(t -> {
System.out.println(t);
//Assert.assertEquals(String.format("mapped %s must be %s", t._1(), MAX), MAX, t._2().longValue());
Assertions.assertEquals(MAX, t._2().longValue(), String.format("mapped %s must be %s", t._1(), MAX));
});
}
public static List<Tuple2<String, Long>> countEntities(final String inputPath) {
final SparkSession spark = SparkSession
.builder()
.appName(SparkGraphImporterJobTest.class.getSimpleName())
.master("local[*]")
.getOrCreate();
//final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
return GraphMappingUtils.types.entrySet()
.stream()
.map(entry -> {
final Long count = spark.read().load(inputPath + "/" + entry.getKey()).as(Encoders.bean(entry.getValue())).count();
return new Tuple2<String, Long>(entry.getKey(), count);
})
.collect(Collectors.toList());
}
}