package eu.dnetlib.jobs.featureextraction; import eu.dnetlib.jobs.AbstractSparkJob; import eu.dnetlib.jobs.SparkTokenizer; import eu.dnetlib.support.ArgumentApplicationParser; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.nio.file.Paths; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class FeatureExtractionJobTest { static SparkSession spark; static JavaSparkContext context; final static String workingPath = "/tmp/working_dir"; final String inputDataPath = Paths .get(getClass().getResource("/eu/dnetlib/jobs/examples/publications.subset.json").toURI()) .toFile() .getAbsolutePath(); final String ldaTopicsPath = Paths .get(getClass().getResource("/eu/dnetlib/jobs/examples/publications_lda_topics_subset").toURI()) .toFile() .getAbsolutePath(); public FeatureExtractionJobTest() throws URISyntaxException {} public static void cleanup() throws IOException { //remove directories and clean workspace FileUtils.deleteDirectory(new File(workingPath)); } @BeforeAll public void setup() throws IOException { cleanup(); spark = SparkSession .builder() .appName("Testing") .master("local[*]") .getOrCreate(); context = JavaSparkContext.fromSparkContext(spark.sparkContext()); } @AfterAll public static void finalCleanUp() throws IOException { cleanup(); } @Test @Order(1) public void publicationFeatureExtractionTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser(readResource("/jobs/parameters/publicationFeatureExtractor_parameters.json", SparkTokenizer.class)); parser.parseArgument( new String[] { "-p", inputDataPath, "-w", workingPath, "-np", "20" } ); new SparkPublicationFeatureExtractor( parser, spark ).run(); } @Test @Order(2) public void authorExtractionTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser(readResource("/jobs/parameters/authorExtractor_parameters.json", SparkAuthorExtractor.class)); parser.parseArgument( new String[]{ "-p", inputDataPath, "-w", workingPath, "-np", "20", "-t", ldaTopicsPath, "-f", workingPath + "/publication_features", "-o", workingPath + "/authors" }); new SparkAuthorExtractor( parser, spark ).run(); } public static String readResource(String path, Class clazz) throws IOException { return IOUtils.toString(clazz.getResourceAsStream(path)); } }