2021-10-20 17:37:42 +02:00
|
|
|
package eu.dnetlib.dhp.datacite
|
2021-03-31 15:45:58 +02:00
|
|
|
|
2021-04-20 09:44:44 +02:00
|
|
|
|
2021-10-20 17:37:42 +02:00
|
|
|
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
2021-03-31 15:45:58 +02:00
|
|
|
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Oaf
|
2021-11-25 11:02:38 +01:00
|
|
|
import org.apache.commons.io.FileUtils
|
2021-11-25 10:54:13 +01:00
|
|
|
import org.apache.spark.SparkConf
|
|
|
|
import org.apache.spark.sql.functions.{col, count}
|
|
|
|
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
2021-03-31 15:45:58 +02:00
|
|
|
import org.junit.jupiter.api.extension.ExtendWith
|
2021-11-25 11:02:38 +01:00
|
|
|
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
2021-03-31 15:45:58 +02:00
|
|
|
import org.mockito.junit.jupiter.MockitoExtension
|
2021-11-25 10:54:13 +01:00
|
|
|
import org.slf4j.{Logger, LoggerFactory}
|
2021-06-04 10:14:22 +02:00
|
|
|
|
2021-11-25 10:54:13 +01:00
|
|
|
import java.nio.file.{Files, Path}
|
2021-07-27 16:09:30 +02:00
|
|
|
import java.text.SimpleDateFormat
|
|
|
|
import java.util.Locale
|
2021-03-31 15:45:58 +02:00
|
|
|
import scala.io.Source
|
2021-11-25 10:54:13 +01:00
|
|
|
import org.junit.jupiter.api.Assertions._
|
2021-12-03 11:15:09 +01:00
|
|
|
|
2021-03-31 15:45:58 +02:00
|
|
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
|
|
|
class DataciteToOAFTest extends AbstractVocabularyTest{
|
|
|
|
|
2021-11-25 11:02:38 +01:00
|
|
|
private var workingDir:Path = null
|
2021-11-25 10:54:13 +01:00
|
|
|
val log: Logger = LoggerFactory.getLogger(getClass)
|
2021-03-31 15:45:58 +02:00
|
|
|
|
|
|
|
@BeforeEach
|
|
|
|
def setUp() :Unit = {
|
2021-04-20 09:44:44 +02:00
|
|
|
|
2021-11-25 10:54:13 +01:00
|
|
|
workingDir= Files.createTempDirectory(getClass.getSimpleName)
|
2021-03-31 15:45:58 +02:00
|
|
|
super.setUpVocabulary()
|
|
|
|
}
|
|
|
|
|
2021-11-25 11:02:38 +01:00
|
|
|
@AfterEach
|
|
|
|
def tearDown() :Unit = {
|
|
|
|
FileUtils.deleteDirectory(workingDir.toFile)
|
|
|
|
}
|
|
|
|
|
2021-07-27 16:09:30 +02:00
|
|
|
|
|
|
|
@Test
|
|
|
|
def testDateMapping:Unit = {
|
|
|
|
val inputDate = "2021-07-14T11:52:54+0000"
|
|
|
|
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
|
|
|
|
val dt = ISO8601FORMAT.parse(inputDate)
|
|
|
|
println(dt.getTime)
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-03-31 15:45:58 +02:00
|
|
|
@Test
|
2021-11-25 10:54:13 +01:00
|
|
|
def testConvert(): Unit = {
|
|
|
|
|
|
|
|
|
|
|
|
val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath
|
|
|
|
|
|
|
|
val conf = new SparkConf()
|
|
|
|
val spark:SparkSession = SparkSession.builder().config(conf)
|
|
|
|
.appName(getClass.getSimpleName)
|
|
|
|
.master("local[*]")
|
|
|
|
.getOrCreate()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf]
|
|
|
|
val instance = new GenerateDataciteDatasetSpark(null, null, log)
|
|
|
|
val targetPath = s"$workingDir/result"
|
|
|
|
|
|
|
|
instance.generateDataciteDataset(path, exportLinks = true, vocabularies,targetPath, spark)
|
|
|
|
|
|
|
|
import spark.implicits._
|
|
|
|
|
|
|
|
val nativeSize =spark.read.load(path).count()
|
|
|
|
|
|
|
|
|
|
|
|
assertEquals(100, nativeSize)
|
|
|
|
|
|
|
|
val result:Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]
|
2021-03-31 15:45:58 +02:00
|
|
|
|
|
|
|
|
2021-11-25 10:54:13 +01:00
|
|
|
result.map(s => s.getClass.getSimpleName).groupBy(col("value").alias("class")).agg(count("value").alias("Total")).show(false)
|
|
|
|
|
|
|
|
val t = spark.read.load(targetPath).count()
|
|
|
|
|
|
|
|
assertTrue(t >0)
|
|
|
|
|
|
|
|
|
|
|
|
spark.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
def testMapping() :Unit = {
|
|
|
|
val record =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json")).mkString
|
2021-03-31 15:45:58 +02:00
|
|
|
|
2021-06-04 10:14:22 +02:00
|
|
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
|
|
|
val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies, true )
|
|
|
|
|
|
|
|
res.foreach(r => {
|
|
|
|
println (mapper.writeValueAsString(r))
|
|
|
|
println("----------------------------")
|
|
|
|
|
|
|
|
})
|
|
|
|
|
2021-03-31 15:45:58 +02:00
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|