2020-05-23 08:46:49 +02:00
|
|
|
package eu.dnetlib.dhp.doiboost
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset}
|
2020-05-23 08:46:49 +02:00
|
|
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
2020-05-26 09:15:33 +02:00
|
|
|
import eu.dnetlib.doiboost.SparkGenerateDoiBoost.getClass
|
2020-05-29 09:32:04 +02:00
|
|
|
import eu.dnetlib.doiboost.mag.ConversionUtil
|
|
|
|
import eu.dnetlib.doiboost.orcid.ORCIDElement
|
2020-05-26 09:15:33 +02:00
|
|
|
import org.apache.spark.SparkConf
|
2020-05-29 09:32:04 +02:00
|
|
|
import org.apache.spark.rdd.RDD
|
|
|
|
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
2020-05-26 09:15:33 +02:00
|
|
|
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
2020-05-23 08:46:49 +02:00
|
|
|
import org.junit.jupiter.api.Test
|
|
|
|
|
2020-05-29 09:32:04 +02:00
|
|
|
import scala.io.Source
|
|
|
|
|
2020-05-23 08:46:49 +02:00
|
|
|
class DoiBoostHostedByMapTest {
|
|
|
|
|
|
|
|
@Test
|
|
|
|
def testLoadMap(): Unit = {
|
|
|
|
println(DoiBoostMappingUtil.retrieveHostedByMap().keys.size)
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
|
|
|
|
@Test
|
2020-05-29 09:32:04 +02:00
|
|
|
def testMerge():Unit = {
|
2020-05-26 09:15:33 +02:00
|
|
|
val conf: SparkConf = new SparkConf()
|
|
|
|
val spark: SparkSession =
|
|
|
|
SparkSession
|
|
|
|
.builder()
|
|
|
|
.config(conf)
|
|
|
|
.appName(getClass.getSimpleName)
|
|
|
|
.master("local[*]").getOrCreate()
|
|
|
|
|
|
|
|
|
2020-05-29 09:32:04 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
|
|
|
|
implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
|
|
|
|
implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub)
|
|
|
|
|
|
|
|
|
2020-05-29 09:32:04 +02:00
|
|
|
import spark.implicits._
|
|
|
|
val dataset:Dataset[ORCIDElement] = spark.read.json("/home/sandro/orcid").as[ORCIDElement]
|
|
|
|
|
|
|
|
|
|
|
|
dataset.show(false)
|
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
def idDSGeneration():Unit = {
|
|
|
|
val s ="doajarticles::0066-782X"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
println(DoiBoostMappingUtil.generateDSId(s))
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-05-23 08:46:49 +02:00
|
|
|
}
|