2021-07-27 12:27:26 +02:00
|
|
|
package eu.dnetlib.dhp.oa.graph.hostedbymap
|
|
|
|
|
|
|
|
import java.sql.Timestamp
|
|
|
|
|
2021-07-28 10:24:13 +02:00
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper
|
|
|
|
import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData}
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Datasource
|
2021-07-27 12:27:26 +02:00
|
|
|
import org.apache.spark.SparkConf
|
2021-07-28 10:24:13 +02:00
|
|
|
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
2021-07-27 12:27:26 +02:00
|
|
|
import org.json4s.DefaultFormats
|
|
|
|
import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue}
|
|
|
|
import org.junit.jupiter.api.Test
|
|
|
|
import org.slf4j.{Logger, LoggerFactory}
|
|
|
|
|
2021-07-28 10:24:13 +02:00
|
|
|
import scala.collection.mutable.ListBuffer
|
2021-07-27 12:27:26 +02:00
|
|
|
import scala.io.Source
|
|
|
|
|
2021-07-28 10:24:13 +02:00
|
|
|
class TestPreprocess extends java.io.Serializable{
|
2021-07-27 12:27:26 +02:00
|
|
|
|
2021-07-28 10:24:13 +02:00
|
|
|
implicit val mapEncoderDats: Encoder[Datasource] = Encoders.kryo[Datasource]
|
|
|
|
implicit val schema = Encoders.product[HostedByInfo]
|
2021-07-27 12:27:26 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
def readDatasource():Unit = {
|
|
|
|
|
|
|
|
|
|
|
|
import org.apache.spark.sql.Encoders
|
|
|
|
implicit val formats = DefaultFormats
|
2021-07-28 10:24:13 +02:00
|
|
|
|
|
|
|
val logger: Logger = LoggerFactory.getLogger(getClass)
|
|
|
|
val mapper = new ObjectMapper()
|
|
|
|
|
|
|
|
|
2021-07-27 12:27:26 +02:00
|
|
|
|
|
|
|
val conf = new SparkConf()
|
|
|
|
conf.setMaster("local[*]")
|
|
|
|
conf.set("spark.driver.host", "localhost")
|
|
|
|
val spark: SparkSession =
|
|
|
|
SparkSession
|
|
|
|
.builder()
|
|
|
|
.appName(getClass.getSimpleName)
|
|
|
|
.config(conf)
|
|
|
|
.getOrCreate()
|
|
|
|
val path = getClass.getResource("datasource.json").getPath
|
|
|
|
|
|
|
|
|
2021-07-28 10:24:13 +02:00
|
|
|
println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spark.close()
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
def readGold():Unit = {
|
|
|
|
|
|
|
|
implicit val formats = DefaultFormats
|
|
|
|
|
|
|
|
val logger: Logger = LoggerFactory.getLogger(getClass)
|
|
|
|
val mapper = new ObjectMapper()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
val conf = new SparkConf()
|
|
|
|
conf.setMaster("local[*]")
|
|
|
|
conf.set("spark.driver.host", "localhost")
|
|
|
|
val spark: SparkSession =
|
|
|
|
SparkSession
|
|
|
|
.builder()
|
|
|
|
.appName(getClass.getSimpleName)
|
|
|
|
.config(conf)
|
|
|
|
.getOrCreate()
|
|
|
|
val path = getClass.getResource("unibi_transformed.json").getPath
|
|
|
|
|
2021-07-27 12:27:26 +02:00
|
|
|
|
2021-07-28 10:24:13 +02:00
|
|
|
println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count)
|
2021-07-27 12:27:26 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spark.close()
|
|
|
|
}
|
|
|
|
|
2021-07-28 10:24:13 +02:00
|
|
|
@Test
|
|
|
|
def readDoaj():Unit = {
|
|
|
|
|
|
|
|
implicit val formats = DefaultFormats
|
|
|
|
|
|
|
|
val logger: Logger = LoggerFactory.getLogger(getClass)
|
|
|
|
val mapper = new ObjectMapper()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
val conf = new SparkConf()
|
|
|
|
conf.setMaster("local[*]")
|
|
|
|
conf.set("spark.driver.host", "localhost")
|
|
|
|
val spark: SparkSession =
|
|
|
|
SparkSession
|
|
|
|
.builder()
|
|
|
|
.appName(getClass.getSimpleName)
|
|
|
|
.config(conf)
|
|
|
|
.getOrCreate()
|
|
|
|
val path = getClass.getResource("doaj_transformed.json").getPath
|
|
|
|
|
|
|
|
|
|
|
|
println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spark.close()
|
|
|
|
}
|
2021-07-27 12:27:26 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|