package eu.dnetlib.dhp.bioschema import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension import org.slf4j.{Logger, LoggerFactory} import java.nio.file.{Files, Path} import scala.io.Source @ExtendWith(Array(classOf[MockitoExtension])) class BioschemaDataciteToOAFTest { private var workingDir: Path = null val log: Logger = LoggerFactory.getLogger(getClass) @BeforeEach def setUp(): Unit = { workingDir = Files.createTempDirectory(getClass.getSimpleName) } @AfterEach def tearDown(): Unit = { FileUtils.deleteDirectory(workingDir.toFile) } @Test def testGeneratePED(): Unit = { val path = getClass.getResource("/eu/dnetlib/dhp/bioschema/ped_dump").getPath val conf = new SparkConf() val spark: SparkSession = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) .master("local[*]") .getOrCreate() implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] val instance = new GenerateBioschemaDatasetSpark(null, null, log) val targetPath = s"$workingDir/result" instance.generateBioschemaDataset(path, exportLinks = true, targetPath, "ped", "protein", spark) val total_items = spark.read.text(targetPath).count() println(s"total_items: $total_items") assertTrue(total_items == 50) instance.generateBioschemaDataset(path, exportLinks = false, targetPath, "ped", "protein", spark) val total_datasets = spark.read.text(targetPath).count() println(s"total_datasets: $total_datasets") assertTrue(total_datasets == 10) spark.stop() } @Test def testMapping(): Unit = { val record = Source .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/bioschema/ped_record.json")) .mkString val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) val res: List[Oaf] = BioschemaToOAFTransformation.generateOAF(record, true, "ped", "protein") res.foreach(r => { println(mapper.writeValueAsString(r)) println("----------------------------") }) } }