From 457293ccc0c86f1cb5e51b179772302102b18c81 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 19 May 2020 18:43:42 +0200 Subject: [PATCH] test for the variuos steps of project update with programme --- .../actionmanager/project/CSVParserTest.java | 50 +++++----- .../project/PrepareProgrammeTest.java | 90 ++++++++++++++++++ .../project/SparkUpdateProjectTest.java | 92 ++++++++++++++++++- .../httpconnector/HttpConnectorTest.java | 35 +++++++ 4 files changed, 240 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java index d344f3118..17fdd4511 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java @@ -1,43 +1,41 @@ + package eu.dnetlib.dhp.actionmanager.project; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVParser; -public class ReadCSVTest { +public class CSVParserTest { - private static Path workingDir; + private static Path workingDir; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(eu.dnetlib.dhp.actionmanager.project.ReadCSVTest.class.getSimpleName()); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName()); + } - } - @Test - public void readProgrammeTest() throws Exception { + @Test + public void readProgrammeTest() throws Exception { - String programmecsv = IOUtils.toString(getClass() - .getClassLoader().getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); - ReadCSV - .main( - new String[] { - "-fileURL", - "http://cordis.europa.eu/data/reference/cordisref-H2020programmes.csv", - "-outputPath", - workingDir.toString() + "/project", - "-hdfsPath", - getClass().getResource("/eu/dnetlib/dhp/blacklist/blacklist").getPath(), - "-mergesPath", - getClass().getResource("/eu/dnetlib/dhp/blacklist/mergesRelOneMerge").getPath(), - }); + String programmecsv = IOUtils + .toString( + getClass() + .getClassLoader() + .getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); + CSVParser csvParser = new CSVParser(); + List pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme"); + System.out.println(pl.size()); - } + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java index b22e6bd6d..50804f75e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java @@ -1,4 +1,94 @@ + package eu.dnetlib.dhp.actionmanager.project; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; + public class PrepareProgrammeTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class + .getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareProgrammeTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void numberDistinctProgrammeTest() throws Exception { + PrepareProgramme + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-programmePath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/whole_programme.json").getPath(), + "-outputPath", + workingDir.toString() + "/preparedProgramme" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedProgramme") + .map(item -> OBJECT_MAPPER.readValue(item, CSVProgramme.class)); + + Assertions.assertEquals(277, tmp.count()); + + Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); + + Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); + } + } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index f7d271fe0..d48884842 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -1,4 +1,94 @@ + package eu.dnetlib.dhp.actionmanager.project; -public class SparkUpdateProjectSet { +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Project; + +public class SparkUpdateProjectTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class + .getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(SparkUpdateProjectTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void numberDistinctProgrammeTest() throws Exception { + SparkAtomicActionJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-programmePath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json").getPath(), + "-projectPath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/projects_subset.json").getPath(), + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/actionSet") + .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); + + Assertions.assertEquals(14, tmp.count()); + +// Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); +// +// Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java index 29e9a6cce..51a7019ca 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java @@ -1,4 +1,39 @@ + package eu.dnetlib.dhp.actionmanager.project.httpconnector; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.ssl.SSLContextBuilder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + public class HttpConnectorTest { + + private static final Log log = LogFactory.getLog(HttpConnectorTest.class); + private static HttpConnector connector; + + private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020programmes.csv"; + private static final String URL_MISCONFIGURED_SERVER = "https://www.alexandria.unisg.ch/cgi/oai2?verb=Identify"; + private static final String URL_GOODSNI_SERVER = "https://air.unimi.it/oai/openaire?verb=Identify"; + + private static final SSLContextBuilder sslContextBuilder = new SSLContextBuilder(); + private static SSLConnectionSocketFactory sslSocketFactory; + + @BeforeAll + public static void setUp() { + connector = new HttpConnector(); + } + + @Test + + public void testGetInputSource() throws CollectorServiceException { + System.out.println(connector.getInputSource(URL)); + } + + @Test + public void testGoodServers() throws CollectorServiceException { + System.out.println(connector.getInputSource(URL_GOODSNI_SERVER)); + } + }