From ef1d8aef178f956902532133456a5c7702a5cecd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 29 Jul 2020 18:27:46 +0200 Subject: [PATCH] added one test to verify the dump for the datasources --- ...umpOrganizationProjectDatasourceTest.java} | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/{DumpOrganizationProjectTest.java => DumpOrganizationProjectDatasourceTest.java} (72%) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectDatasourceTest.java similarity index 72% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectTest.java rename to dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectDatasourceTest.java index 053b5285c..53353a2e2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectDatasourceTest.java @@ -6,6 +6,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; +import eu.dnetlib.dhp.schema.oaf.Datasource; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -24,7 +25,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Project; -public class DumpOrganizationProjectTest { +public class DumpOrganizationProjectDatasourceTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -33,18 +34,18 @@ public class DumpOrganizationProjectTest { private static Path workingDir; private static final Logger log = LoggerFactory - .getLogger(DumpOrganizationProjectTest.class); + .getLogger(DumpOrganizationProjectDatasourceTest.class); private static HashMap map = new HashMap<>(); @BeforeAll public static void beforeAll() throws IOException { workingDir = Files - .createTempDirectory(DumpOrganizationProjectTest.class.getSimpleName()); + .createTempDirectory(DumpOrganizationProjectDatasourceTest.class.getSimpleName()); log.info("using work dir {}", workingDir); SparkConf conf = new SparkConf(); - conf.setAppName(DumpOrganizationProjectTest.class.getSimpleName()); + conf.setAppName(DumpOrganizationProjectDatasourceTest.class.getSimpleName()); conf.setMaster("local[*]"); conf.set("spark.driver.host", "localhost"); @@ -55,7 +56,7 @@ public class DumpOrganizationProjectTest { spark = SparkSession .builder() - .appName(DumpOrganizationProjectTest.class.getSimpleName()) + .appName(DumpOrganizationProjectDatasourceTest.class.getSimpleName()) .config(conf) .getOrCreate(); } @@ -118,4 +119,28 @@ public class DumpOrganizationProjectTest { } + @Test + public void dumpDatasourceTest(){ + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/graph/datasource") + .getPath(); + + DumpGraphEntities dg = new DumpGraphEntities(); + + dg.run(false, sourcePath, workingDir.toString() + "/dump", Datasource.class, null); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/dump") + .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource.class)); + + Assertions.assertEquals(5, verificationDataset.count()); + + verificationDataset.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o))); + } + }