2021-12-06 14:24:03 +01:00
|
|
|
package eu.dnetlib.dhp.doiboost.orcid
|
2020-05-22 15:15:09 +02:00
|
|
|
|
2020-12-23 16:59:52 +01:00
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper
|
2020-12-07 19:59:33 +01:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.Publication
|
2021-12-06 14:24:03 +01:00
|
|
|
import eu.dnetlib.doiboost.orcid._
|
2021-04-14 10:03:01 +02:00
|
|
|
import org.apache.spark.SparkConf
|
2021-04-13 17:47:43 +02:00
|
|
|
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
2020-06-09 18:07:14 +02:00
|
|
|
import org.junit.jupiter.api.Assertions._
|
2020-05-22 15:15:09 +02:00
|
|
|
import org.junit.jupiter.api.Test
|
2021-04-13 17:47:43 +02:00
|
|
|
import org.junit.jupiter.api.io.TempDir
|
2020-05-22 15:15:09 +02:00
|
|
|
import org.slf4j.{Logger, LoggerFactory}
|
|
|
|
|
2021-04-13 17:47:43 +02:00
|
|
|
import java.nio.file.Path
|
2021-06-30 13:00:52 +02:00
|
|
|
import scala.collection.JavaConversions._
|
2021-12-06 14:24:03 +01:00
|
|
|
import scala.io.Source
|
2021-06-30 13:00:52 +02:00
|
|
|
|
2020-05-22 15:15:09 +02:00
|
|
|
class MappingORCIDToOAFTest {
|
2020-06-09 18:07:14 +02:00
|
|
|
val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass)
|
2020-05-22 15:15:09 +02:00
|
|
|
val mapper = new ObjectMapper()
|
|
|
|
|
|
|
|
@Test
|
|
|
|
def testExtractData():Unit ={
|
2021-12-06 14:24:03 +01:00
|
|
|
val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/orcid/dataOutput")).mkString
|
2020-05-22 15:15:09 +02:00
|
|
|
assertNotNull(json)
|
|
|
|
assertFalse(json.isEmpty)
|
|
|
|
json.lines.foreach(s => {
|
|
|
|
assertNotNull(ORCIDToOAF.extractValueFromInputString(s))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-04-13 17:47:43 +02:00
|
|
|
@Test
|
|
|
|
def testOAFConvert(@TempDir testDir: Path):Unit ={
|
|
|
|
val sourcePath:String = getClass.getResource("/eu/dnetlib/doiboost/orcid/datasets").getPath
|
|
|
|
val targetPath: String =s"${testDir.toString}/output/orcidPublication"
|
|
|
|
val workingPath =s"${testDir.toString}/wp/"
|
|
|
|
|
2021-04-14 10:03:01 +02:00
|
|
|
val conf = new SparkConf()
|
|
|
|
conf.setMaster("local[*]")
|
|
|
|
conf.set("spark.driver.host", "localhost")
|
2021-04-13 17:47:43 +02:00
|
|
|
val spark: SparkSession =
|
|
|
|
SparkSession
|
|
|
|
.builder()
|
|
|
|
.appName(getClass.getSimpleName)
|
2021-04-14 10:03:01 +02:00
|
|
|
.config(conf)
|
|
|
|
.getOrCreate()
|
2021-04-13 17:47:43 +02:00
|
|
|
implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
|
|
|
|
import spark.implicits._
|
|
|
|
|
2021-07-14 09:44:32 +02:00
|
|
|
SparkPreprocessORCID.run( spark,sourcePath, workingPath)
|
2021-04-13 17:47:43 +02:00
|
|
|
|
2021-07-14 14:37:06 +02:00
|
|
|
SparkConvertORCIDToOAF.run(spark, workingPath,targetPath)
|
|
|
|
|
2021-04-13 17:47:43 +02:00
|
|
|
val mapper = new ObjectMapper()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
val oA = spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem].count()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
val p: Dataset[Publication] = spark.read.load(targetPath).as[Publication]
|
|
|
|
|
|
|
|
assertTrue(oA == p.count())
|
|
|
|
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p.first()))
|
|
|
|
|
2021-07-14 14:37:06 +02:00
|
|
|
spark.close()
|
|
|
|
|
2021-04-13 17:47:43 +02:00
|
|
|
|
|
|
|
}
|
2020-12-07 19:59:33 +01:00
|
|
|
|
|
|
|
|
2021-06-30 13:00:52 +02:00
|
|
|
@Test
|
|
|
|
def testExtractDat1():Unit ={
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
val aList: List[OrcidAuthor] = List(OrcidAuthor("0000-0002-4335-5309", Some("Lucrecia"), Some("Curto"), null, null, null ),
|
|
|
|
OrcidAuthor("0000-0001-7501-3330", Some("Emilio"), Some("Malchiodi"), null, null, null ), OrcidAuthor("0000-0002-5490-9186", Some("Sofia"), Some("Noli Truant"), null, null, null ))
|
2020-12-07 19:59:33 +01:00
|
|
|
|
2021-06-30 13:00:52 +02:00
|
|
|
val orcid:ORCIDItem = ORCIDItem("10.1042/BCJ20160876", aList)
|
|
|
|
|
|
|
|
val oaf = ORCIDToOAF.convertTOOAF(orcid)
|
|
|
|
assert(oaf.getPid.size() == 1)
|
|
|
|
oaf.getPid.toList.foreach(pid => assert(pid.getQualifier.getClassid.equals("doi")))
|
2021-07-14 14:37:06 +02:00
|
|
|
oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876")))
|
2021-06-30 13:00:52 +02:00
|
|
|
//println(mapper.writeValueAsString(ORCIDToOAF.convertTOOAF(orcid)))
|
|
|
|
|
|
|
|
|
|
|
|
}
|
2020-05-22 15:15:09 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|