dnet-and/dnet-and-test/src/test/java/eu/dnetlib/jobs/featureextraction/FeatureExtractionJobTest.java

104 lines
3.3 KiB
Java

package eu.dnetlib.jobs.featureextraction;
import eu.dnetlib.jobs.AbstractSparkJob;
import eu.dnetlib.jobs.SparkTokenizer;
import eu.dnetlib.support.ArgumentApplicationParser;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.*;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Paths;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
public class FeatureExtractionJobTest {
static SparkSession spark;
static JavaSparkContext context;
final static String workingPath = "/tmp/working_dir";
final String inputDataPath = Paths
.get(getClass().getResource("/eu/dnetlib/jobs/examples/publications.subset.json").toURI())
.toFile()
.getAbsolutePath();
final String ldaTopicsPath = Paths
.get(getClass().getResource("/eu/dnetlib/jobs/examples/publications_lda_topics_subset").toURI())
.toFile()
.getAbsolutePath();
public FeatureExtractionJobTest() throws URISyntaxException {}
public static void cleanup() throws IOException {
//remove directories and clean workspace
FileUtils.deleteDirectory(new File(workingPath));
}
@BeforeAll
public void setup() throws IOException {
cleanup();
spark = SparkSession
.builder()
.appName("Testing")
.master("local[*]")
.getOrCreate();
context = JavaSparkContext.fromSparkContext(spark.sparkContext());
}
@AfterAll
public static void finalCleanUp() throws IOException {
cleanup();
}
@Test
@Order(1)
public void publicationFeatureExtractionTest() throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(readResource("/jobs/parameters/publicationFeatureExtractor_parameters.json", SparkTokenizer.class));
parser.parseArgument(
new String[] {
"-p", inputDataPath,
"-w", workingPath,
"-np", "20"
}
);
new SparkPublicationFeatureExtractor(
parser,
spark
).run();
}
@Test
@Order(2)
public void authorExtractionTest() throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(readResource("/jobs/parameters/authorExtractor_parameters.json", SparkAuthorExtractor.class));
parser.parseArgument(
new String[]{
"-p", inputDataPath,
"-w", workingPath,
"-np", "20",
"-t", ldaTopicsPath,
"-f", workingPath + "/publication_features",
"-o", workingPath + "/authors"
});
new SparkAuthorExtractor(
parser,
spark
).run();
}
public static String readResource(String path, Class<? extends AbstractSparkJob> clazz) throws IOException {
return IOUtils.toString(clazz.getResourceAsStream(path));
}
}