diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImportCounterTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImportCounterTest.java deleted file mode 100644 index a8e810d4f..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImportCounterTest.java +++ /dev/null @@ -1,31 +0,0 @@ -package eu.dnetlib.dhp.graph; - -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import scala.Tuple2; - -import java.util.List; -import java.util.stream.Collectors; - -public class SparkGraphImportCounterTest { - - public static List> countEntities(final String inputPath) throws Exception { - - final SparkSession spark = SparkSession - .builder() - .appName(SparkGraphImportCounterTest.class.getSimpleName()) - .master("local[*]") - .getOrCreate(); - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - - return GraphMappingUtils.types.entrySet() - .stream() - .map(entry -> { - final Long count = spark.read().load(inputPath + "/" + entry.getKey()).as(Encoders.bean(entry.getValue())).count(); - return new Tuple2(entry.getKey(), count); - }) - .collect(Collectors.toList()); - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImporterJobTest.java index cb659d52d..cca666e21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImporterJobTest.java @@ -1,38 +1,52 @@ package eu.dnetlib.dhp.graph; -import org.apache.commons.io.FileUtils; -import org.junit.*; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import scala.Tuple2; -import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; +import java.util.List; +import java.util.stream.Collectors; public class SparkGraphImporterJobTest { private static final long MAX = 1000L; - private Path testDir; - @BeforeEach - public void setup() throws IOException { - testDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - public void tearDown() throws IOException { - FileUtils.deleteDirectory(testDir.toFile()); - } - - @Test - @Ignore - public void testImport() throws Exception { + @Disabled("must be parametrized to run locally") + public void testImport(@TempDir Path outPath) throws Exception { SparkGraphImporterJob.main(new String[] { "-mt", "local[*]", - "-i", getClass().getResource("/eu/dnetlib/dhp/dhp-sample/part-m-00010").getPath(), - "-o", testDir.toString()}); + "-s", getClass().getResource("/eu/dnetlib/dhp/dhp-sample/publication_10001.json.gz").getPath(), + "-h", "", + "-db", "test" + }); - SparkGraphImportCounterTest.countEntities(testDir.toString()).forEach(t -> { + countEntities(outPath.toString()).forEach(t -> { System.out.println(t); - //Assert.assertEquals(String.format("mapped %s must be %s", t._1(), MAX), MAX, t._2().longValue()); + Assertions.assertEquals(MAX, t._2().longValue(), String.format("mapped %s must be %s", t._1(), MAX)); }); } + + public static List> countEntities(final String inputPath) { + + final SparkSession spark = SparkSession + .builder() + .appName(SparkGraphImporterJobTest.class.getSimpleName()) + .master("local[*]") + .getOrCreate(); + //final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + return GraphMappingUtils.types.entrySet() + .stream() + .map(entry -> { + final Long count = spark.read().load(inputPath + "/" + entry.getKey()).as(Encoders.bean(entry.getValue())).count(); + return new Tuple2(entry.getKey(), count); + }) + .collect(Collectors.toList()); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/dhp-sample/publication_10001.json.gz b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/dhp-sample/publication_10001.json.gz new file mode 100644 index 000000000..8d2635fbb Binary files /dev/null and b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/dhp-sample/publication_10001.json.gz differ