104 lines
3.3 KiB
Java
104 lines
3.3 KiB
Java
package eu.dnetlib.jobs.featureextraction;
|
|
|
|
import eu.dnetlib.jobs.AbstractSparkJob;
|
|
import eu.dnetlib.jobs.SparkTokenizer;
|
|
import eu.dnetlib.support.ArgumentApplicationParser;
|
|
import org.apache.commons.io.FileUtils;
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.spark.api.java.JavaSparkContext;
|
|
import org.apache.spark.sql.SparkSession;
|
|
import org.junit.jupiter.api.*;
|
|
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.net.URISyntaxException;
|
|
import java.nio.file.Paths;
|
|
|
|
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
|
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
|
|
public class FeatureExtractionJobTest {
|
|
static SparkSession spark;
|
|
static JavaSparkContext context;
|
|
final static String workingPath = "/tmp/working_dir";
|
|
|
|
final String inputDataPath = Paths
|
|
.get(getClass().getResource("/eu/dnetlib/jobs/examples/publications.subset.json").toURI())
|
|
.toFile()
|
|
.getAbsolutePath();
|
|
|
|
final String ldaTopicsPath = Paths
|
|
.get(getClass().getResource("/eu/dnetlib/jobs/examples/publications_lda_topics_subset").toURI())
|
|
.toFile()
|
|
.getAbsolutePath();
|
|
|
|
public FeatureExtractionJobTest() throws URISyntaxException {}
|
|
|
|
public static void cleanup() throws IOException {
|
|
//remove directories and clean workspace
|
|
FileUtils.deleteDirectory(new File(workingPath));
|
|
}
|
|
|
|
@BeforeAll
|
|
public void setup() throws IOException {
|
|
cleanup();
|
|
|
|
spark = SparkSession
|
|
.builder()
|
|
.appName("Testing")
|
|
.master("local[*]")
|
|
.getOrCreate();
|
|
|
|
context = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
}
|
|
|
|
@AfterAll
|
|
public static void finalCleanUp() throws IOException {
|
|
cleanup();
|
|
}
|
|
|
|
@Test
|
|
@Order(1)
|
|
public void publicationFeatureExtractionTest() throws Exception {
|
|
ArgumentApplicationParser parser = new ArgumentApplicationParser(readResource("/jobs/parameters/publicationFeatureExtractor_parameters.json", SparkTokenizer.class));
|
|
|
|
parser.parseArgument(
|
|
new String[] {
|
|
"-p", inputDataPath,
|
|
"-w", workingPath,
|
|
"-np", "20"
|
|
}
|
|
);
|
|
|
|
new SparkPublicationFeatureExtractor(
|
|
parser,
|
|
spark
|
|
).run();
|
|
}
|
|
|
|
@Test
|
|
@Order(2)
|
|
public void authorExtractionTest() throws Exception {
|
|
ArgumentApplicationParser parser = new ArgumentApplicationParser(readResource("/jobs/parameters/authorExtractor_parameters.json", SparkAuthorExtractor.class));
|
|
|
|
parser.parseArgument(
|
|
new String[]{
|
|
"-p", inputDataPath,
|
|
"-w", workingPath,
|
|
"-np", "20",
|
|
"-t", ldaTopicsPath,
|
|
"-f", workingPath + "/publication_features",
|
|
"-o", workingPath + "/authors"
|
|
});
|
|
|
|
new SparkAuthorExtractor(
|
|
parser,
|
|
spark
|
|
).run();
|
|
|
|
}
|
|
|
|
public static String readResource(String path, Class<? extends AbstractSparkJob> clazz) throws IOException {
|
|
return IOUtils.toString(clazz.getResourceAsStream(path));
|
|
}
|
|
}
|