forked from D-Net/dnet-hadoop
fixed test, now it compiles after commit a6977197b3
This commit is contained in:
parent
26104826c4
commit
67525076ec
|
@ -2,11 +2,14 @@ package eu.dnetlib.dhp.datacite
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
||||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _}
|
||||||
import org.apache.commons.io.FileUtils
|
import org.apache.commons.io.FileUtils
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.sql.functions.{col, count}
|
import org.apache.spark.sql.functions.{col, count}
|
||||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
||||||
|
import org.json4s.DefaultFormats
|
||||||
|
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||||
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
import org.junit.jupiter.api.extension.ExtendWith
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
||||||
|
@ -45,6 +48,9 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testConvert(): Unit = {
|
def testConvert(): Unit = {
|
||||||
|
|
||||||
|
@ -70,17 +76,18 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
assertEquals(100, nativeSize)
|
assertEquals(100, nativeSize)
|
||||||
|
|
||||||
spark.read.load(targetPath).printSchema();
|
val result: Dataset[String] = spark.read.text(targetPath).as[String].map(DataciteUtilityTest.convertToOAF)(Encoders.STRING)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]
|
|
||||||
|
|
||||||
result
|
result
|
||||||
.map(s => s.getClass.getSimpleName)
|
|
||||||
.groupBy(col("value").alias("class"))
|
.groupBy(col("value").alias("class"))
|
||||||
.agg(count("value").alias("Total"))
|
.agg(count("value").alias("Total"))
|
||||||
.show(false)
|
.show(false)
|
||||||
|
|
||||||
val t = spark.read.load(targetPath).count()
|
val t = spark.read.text(targetPath).as[String].count()
|
||||||
|
|
||||||
assertTrue(t > 0)
|
assertTrue(t > 0)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
package eu.dnetlib.dhp.datacite
|
||||||
|
|
||||||
|
import org.json4s.DefaultFormats
|
||||||
|
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||||
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
|
object DataciteUtilityTest {
|
||||||
|
|
||||||
|
def convertToOAF(input:String) : String = {
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
lazy val json = parse(input)
|
||||||
|
|
||||||
|
|
||||||
|
val isRelation:String = (json \\ "source").extractOrElse("NULL")
|
||||||
|
|
||||||
|
if (isRelation != "NULL") {
|
||||||
|
return "Relation"
|
||||||
|
}
|
||||||
|
|
||||||
|
val iType: List[String] = for {
|
||||||
|
JObject(instance) <- json \\ "instance"
|
||||||
|
JField("instancetype", JObject(instancetype)) <- instance
|
||||||
|
JField("classname", JString(classname)) <- instancetype
|
||||||
|
|
||||||
|
} yield classname
|
||||||
|
|
||||||
|
val l:String =iType.head.toLowerCase()
|
||||||
|
l
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue