Added test to check if merge authors works

This commit is contained in:
Sandro La Bruzzo 2020-10-08 10:33:12 +02:00
parent cd9c377d18
commit fe0a7870e6
1 changed files with 54 additions and 0 deletions

View File

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.sx.graph
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication
import eu.dnetlib.dhp.sx.ebi.EBIAggregator
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
import org.junit.jupiter.api.Assertions._
import org.junit.jupiter.api.Test
import scala.io.Source
class SparkScholexplorerAggregationTest {
@Test
def testFunderRelationshipsMapping(): Unit = {
val publications = Source.fromInputStream(getClass.getResourceAsStream("publication.json")).mkString
var s: List[DLIPublication] = List[DLIPublication]()
val m: ObjectMapper = new ObjectMapper()
m.enable(SerializationFeature.INDENT_OUTPUT)
for (line <- publications.lines) {
s = m.readValue(line, classOf[DLIPublication]) :: s
}
implicit val pubEncoder: Encoder[DLIPublication] = Encoders.kryo[DLIPublication]
val spark: SparkSession = SparkSession.builder().appName("Test").master("local[*]").getOrCreate()
val ds: Dataset[DLIPublication] = spark.createDataset(spark.sparkContext.parallelize(s)).as[DLIPublication]
val unique = ds.map(d => (d.getId, d))(Encoders.tuple(Encoders.STRING, pubEncoder))
.groupByKey(_._1)(Encoders.STRING)
.agg(EBIAggregator.getDLIPublicationAggregator().toColumn)
.map(p => p._2)
val uniquePubs: DLIPublication = unique.first()
s.foreach(pp => assertFalse(pp.getAuthor.isEmpty))
assertNotNull(uniquePubs.getAuthor)
assertFalse(uniquePubs.getAuthor.isEmpty)
}
}