diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala index 31784c7e9..68230b477 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTest.scala @@ -2,11 +2,14 @@ package eu.dnetlib.dhp.datacite import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest -import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf import org.apache.spark.sql.functions.{col, count} import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.jackson.JsonMethods.parse import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} @@ -45,6 +48,9 @@ class DataciteToOAFTest extends AbstractVocabularyTest { } + + + @Test def testConvert(): Unit = { @@ -70,17 +76,18 @@ class DataciteToOAFTest extends AbstractVocabularyTest { assertEquals(100, nativeSize) - spark.read.load(targetPath).printSchema(); + val result: Dataset[String] = spark.read.text(targetPath).as[String].map(DataciteUtilityTest.convertToOAF)(Encoders.STRING) + + + - val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf] result - .map(s => s.getClass.getSimpleName) .groupBy(col("value").alias("class")) .agg(count("value").alias("Total")) .show(false) - val t = spark.read.load(targetPath).count() + val t = spark.read.text(targetPath).as[String].count() assertTrue(t > 0) diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteUtilityTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteUtilityTest.scala new file mode 100644 index 000000000..04d3c4a58 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/datacite/DataciteUtilityTest.scala @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.datacite + +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.jackson.JsonMethods.parse + +object DataciteUtilityTest { + + def convertToOAF(input:String) : String = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json = parse(input) + + + val isRelation:String = (json \\ "source").extractOrElse("NULL") + + if (isRelation != "NULL") { + return "Relation" + } + + val iType: List[String] = for { + JObject(instance) <- json \\ "instance" + JField("instancetype", JObject(instancetype)) <- instance + JField("classname", JString(classname)) <- instancetype + + } yield classname + + val l:String =iType.head.toLowerCase() + l + } + +}