package eu.dnetlib.deeplearning.featureextraction; import eu.dnetlib.featureextraction.ScalaFeatureTransformer; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.linalg.DenseVector; import org.apache.spark.ml.linalg.DenseVector$; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import scala.collection.JavaConversions; import scala.collection.mutable.WrappedArray; import javax.xml.crypto.Data; import java.io.IOException; import java.util.Arrays; public class FeatureTransformerTest { static SparkSession spark; static JavaSparkContext context; static Dataset inputData; static StructType inputSchema = new StructType(new StructField[]{ new StructField("title", DataTypes.StringType, false, Metadata.empty()), new StructField("abstract", DataTypes.StringType, false, Metadata.empty()) }); @BeforeAll public static void setup() throws IOException { spark = SparkSession .builder() .appName("Testing") .master("local[*]") .getOrCreate(); context = JavaSparkContext.fromSparkContext(spark.sparkContext()); inputData = spark.createDataFrame(Arrays.asList( RowFactory.create("article title 1", "article description 1"), RowFactory.create("article title 2", "article description 2") ), inputSchema); } }