From e487b5544c9c81ea348441a8e4f4456bb1c5a800 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 30 Jun 2021 12:57:11 +0200 Subject: [PATCH] added tests for the normalization of the dois --- .../dnetlib/doiboost/mag/MAGMappingTest.scala | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 88b1669f48..7eb50665e2 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -4,7 +4,7 @@ import java.sql.Timestamp import eu.dnetlib.dhp.schema.oaf.Publication import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.api.java.function.MapFunction import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} @@ -62,6 +62,55 @@ class MAGMappingTest { logger.debug(description) } + @Test + def normalizeDoiTest():Unit = { + + import org.json4s.jackson.Serialization.write + import org.json4s.DefaultFormats + + implicit val formats = DefaultFormats + + val conf = new SparkConf().setAppName("test").setMaster("local[2]") + val sc = new SparkContext(conf) + val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + val path = getClass.getResource("magPapers.json").getPath + + import org.apache.spark.sql.Encoders + val schema = Encoders.product[MagPapers].schema + + import spark.implicits._ + val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers] + val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) + assertTrue(ret.count == 10) + ret.take(10).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase()))) + + spark.close() + } + + @Test + def normalizeDoiTest2():Unit = { + + import org.json4s.jackson.Serialization.write + import org.json4s.DefaultFormats + + implicit val formats = DefaultFormats + + val conf = new SparkConf().setAppName("test").setMaster("local[2]") + val sc = new SparkContext(conf) + val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + val path = getClass.getResource("duplicatedMagPapers.json").getPath + + import org.apache.spark.sql.Encoders + val schema = Encoders.product[MagPapers].schema + + import spark.implicits._ + val magPapers :Dataset[MagPapers] = spark.read.option("multiline",true).schema(schema).json(path).as[MagPapers] + val ret :Dataset[MagPapers] = SparkProcessMAG.getDistinctResults(magPapers) + assertTrue(ret.count == 8) + ret.take(8).foreach(mp => assertTrue(mp.Doi.equals(mp.Doi.toLowerCase()))) + spark.close() + //ret.take(8).foreach(mp => println(write(mp))) + } }