forked from antonis.lempesis/dnet-hadoop
dhp-graph-mapper workflow tests upgraded to junit5
This commit is contained in:
parent
cd7dc3e1ae
commit
9dff4adbc3
|
@ -1,31 +0,0 @@
|
||||||
package eu.dnetlib.dhp.graph;
|
|
||||||
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
public class SparkGraphImportCounterTest {
|
|
||||||
|
|
||||||
public static List<Tuple2<String, Long>> countEntities(final String inputPath) throws Exception {
|
|
||||||
|
|
||||||
final SparkSession spark = SparkSession
|
|
||||||
.builder()
|
|
||||||
.appName(SparkGraphImportCounterTest.class.getSimpleName())
|
|
||||||
.master("local[*]")
|
|
||||||
.getOrCreate();
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
return GraphMappingUtils.types.entrySet()
|
|
||||||
.stream()
|
|
||||||
.map(entry -> {
|
|
||||||
final Long count = spark.read().load(inputPath + "/" + entry.getKey()).as(Encoders.bean(entry.getValue())).count();
|
|
||||||
return new Tuple2<String, Long>(entry.getKey(), count);
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,38 +1,52 @@
|
||||||
package eu.dnetlib.dhp.graph;
|
package eu.dnetlib.dhp.graph;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.junit.*;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class SparkGraphImporterJobTest {
|
public class SparkGraphImporterJobTest {
|
||||||
|
|
||||||
private static final long MAX = 1000L;
|
private static final long MAX = 1000L;
|
||||||
private Path testDir;
|
|
||||||
|
|
||||||
@BeforeEach
|
@Disabled("must be parametrized to run locally")
|
||||||
public void setup() throws IOException {
|
public void testImport(@TempDir Path outPath) throws Exception {
|
||||||
testDir = Files.createTempDirectory(getClass().getSimpleName());
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterEach
|
|
||||||
public void tearDown() throws IOException {
|
|
||||||
FileUtils.deleteDirectory(testDir.toFile());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
@Ignore
|
|
||||||
public void testImport() throws Exception {
|
|
||||||
SparkGraphImporterJob.main(new String[] {
|
SparkGraphImporterJob.main(new String[] {
|
||||||
"-mt", "local[*]",
|
"-mt", "local[*]",
|
||||||
"-i", getClass().getResource("/eu/dnetlib/dhp/dhp-sample/part-m-00010").getPath(),
|
"-s", getClass().getResource("/eu/dnetlib/dhp/dhp-sample/publication_10001.json.gz").getPath(),
|
||||||
"-o", testDir.toString()});
|
"-h", "",
|
||||||
|
"-db", "test"
|
||||||
|
});
|
||||||
|
|
||||||
SparkGraphImportCounterTest.countEntities(testDir.toString()).forEach(t -> {
|
countEntities(outPath.toString()).forEach(t -> {
|
||||||
System.out.println(t);
|
System.out.println(t);
|
||||||
//Assert.assertEquals(String.format("mapped %s must be %s", t._1(), MAX), MAX, t._2().longValue());
|
Assertions.assertEquals(MAX, t._2().longValue(), String.format("mapped %s must be %s", t._1(), MAX));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static List<Tuple2<String, Long>> countEntities(final String inputPath) {
|
||||||
|
|
||||||
|
final SparkSession spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName(SparkGraphImporterJobTest.class.getSimpleName())
|
||||||
|
.master("local[*]")
|
||||||
|
.getOrCreate();
|
||||||
|
//final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
return GraphMappingUtils.types.entrySet()
|
||||||
|
.stream()
|
||||||
|
.map(entry -> {
|
||||||
|
final Long count = spark.read().load(inputPath + "/" + entry.getKey()).as(Encoders.bean(entry.getValue())).count();
|
||||||
|
return new Tuple2<String, Long>(entry.getKey(), count);
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue