forked from D-Net/dnet-hadoop
90 lines
2.9 KiB
Scala
90 lines
2.9 KiB
Scala
package eu.dnetlib.dhp.bioschema
|
|
|
|
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
|
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
|
import eu.dnetlib.dhp.schema.oaf.Oaf
|
|
import org.apache.commons.io.FileUtils
|
|
import org.apache.spark.SparkConf
|
|
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
|
|
import org.junit.jupiter.api.Assertions._
|
|
import org.junit.jupiter.api.extension.ExtendWith
|
|
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
|
import org.mockito.junit.jupiter.MockitoExtension
|
|
import org.slf4j.{Logger, LoggerFactory}
|
|
|
|
import java.nio.file.{Files, Path}
|
|
import scala.io.Source
|
|
|
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
|
class BioschemaDataciteToOAFTest {
|
|
|
|
private var workingDir: Path = null
|
|
val log: Logger = LoggerFactory.getLogger(getClass)
|
|
|
|
@BeforeEach
|
|
def setUp(): Unit = {
|
|
workingDir = Files.createTempDirectory(getClass.getSimpleName)
|
|
}
|
|
|
|
@AfterEach
|
|
def tearDown(): Unit = {
|
|
FileUtils.deleteDirectory(workingDir.toFile)
|
|
}
|
|
|
|
@Test
|
|
def testGeneratePED(): Unit = {
|
|
val path = getClass.getResource("/eu/dnetlib/dhp/bioschema/ped_dump").getPath
|
|
val conf = new SparkConf()
|
|
val spark: SparkSession = SparkSession
|
|
.builder()
|
|
.config(conf)
|
|
.appName(getClass.getSimpleName)
|
|
.master("local[*]")
|
|
.getOrCreate()
|
|
|
|
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
|
val instance = new GenerateBioschemaDatasetSpark(null, null, log)
|
|
val targetPath = s"$workingDir/result"
|
|
|
|
instance.generateBioschemaDataset(path, exportLinks = true, targetPath, "ped", "protein", spark)
|
|
|
|
val total_items = spark.read.text(targetPath).count()
|
|
println(s"total_items: $total_items")
|
|
assertTrue(total_items == 21)
|
|
|
|
instance.generateBioschemaDataset(path, exportLinks = false, targetPath, "ped", "protein", spark)
|
|
|
|
val total_datasets = spark.read.text(targetPath).count()
|
|
println(s"total_datasets: $total_datasets")
|
|
assertTrue(total_datasets == 5)
|
|
|
|
spark.stop()
|
|
}
|
|
|
|
@Test
|
|
def testPEDMapping(): Unit = {
|
|
val record = Source
|
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/bioschema/ped_record.json"))
|
|
.mkString
|
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
|
val res: List[Oaf] = BioschemaToOAFTransformation.generateOAF(record, true, "ped", "protein")
|
|
res.foreach(r => {
|
|
println(mapper.writeValueAsString(r))
|
|
println("----------------------------")
|
|
})
|
|
}
|
|
|
|
@Test
|
|
def testDISPROTMapping(): Unit = {
|
|
val record = Source
|
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/bioschema/disprot_record.json"))
|
|
.mkString
|
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
|
val res: List[Oaf] = BioschemaToOAFTransformation.generateOAF(record, true, "disprot", "protein")
|
|
res.foreach(r => {
|
|
println(mapper.writeValueAsString(r))
|
|
println("----------------------------")
|
|
})
|
|
}
|
|
}
|