dnet-hadoop/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala

119 lines
2.6 KiB
Scala

package eu.dnetlib.dhp.oa.graph.hostedbymap
import java.sql.Timestamp
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData}
import eu.dnetlib.dhp.schema.oaf.Datasource
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
import org.json4s.DefaultFormats
import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue}
import org.junit.jupiter.api.Test
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.mutable.ListBuffer
import scala.io.Source
class TestPreprocess extends java.io.Serializable{
implicit val mapEncoderDats: Encoder[Datasource] = Encoders.kryo[Datasource]
implicit val schema = Encoders.product[HostedByInfo]
@Test
def readDatasource():Unit = {
import org.apache.spark.sql.Encoders
implicit val formats = DefaultFormats
val logger: Logger = LoggerFactory.getLogger(getClass)
val mapper = new ObjectMapper()
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.set("spark.driver.host", "localhost")
val spark: SparkSession =
SparkSession
.builder()
.appName(getClass.getSimpleName)
.config(conf)
.getOrCreate()
val path = getClass.getResource("datasource.json").getPath
println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count)
spark.close()
}
@Test
def readGold():Unit = {
implicit val formats = DefaultFormats
val logger: Logger = LoggerFactory.getLogger(getClass)
val mapper = new ObjectMapper()
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.set("spark.driver.host", "localhost")
val spark: SparkSession =
SparkSession
.builder()
.appName(getClass.getSimpleName)
.config(conf)
.getOrCreate()
val path = getClass.getResource("unibi_transformed.json").getPath
println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count)
spark.close()
}
@Test
def readDoaj():Unit = {
implicit val formats = DefaultFormats
val logger: Logger = LoggerFactory.getLogger(getClass)
val mapper = new ObjectMapper()
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.set("spark.driver.host", "localhost")
val spark: SparkSession =
SparkSession
.builder()
.appName(getClass.getSimpleName)
.config(conf)
.getOrCreate()
val path = getClass.getResource("doaj_transformed.json").getPath
println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count)
spark.close()
}
}