2021-06-18 16:41:24 +02:00
|
|
|
package eu.dnetlib.dhp.sx.bio.pubmed
|
2020-07-10 14:44:50 +02:00
|
|
|
|
2021-06-16 14:56:24 +02:00
|
|
|
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
|
2021-06-18 16:41:24 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
|
|
|
|
import eu.dnetlib.dhp.sx.bio.PDBToOAF
|
2021-06-16 14:56:24 +02:00
|
|
|
import org.junit.jupiter.api.Assertions._
|
|
|
|
import org.junit.jupiter.api.extension.ExtendWith
|
|
|
|
import org.junit.jupiter.api.{BeforeEach, Test}
|
|
|
|
import org.mockito.junit.jupiter.MockitoExtension
|
|
|
|
|
|
|
|
import scala.collection.JavaConverters._
|
|
|
|
import scala.io.Source
|
|
|
|
import scala.xml.pull.XMLEventReader
|
2021-06-03 10:52:09 +02:00
|
|
|
|
|
|
|
|
2021-06-16 14:56:24 +02:00
|
|
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
2021-06-18 16:41:24 +02:00
|
|
|
class BioScholixTest extends AbstractVocabularyTest{
|
2020-07-10 14:44:50 +02:00
|
|
|
|
2021-05-04 14:54:12 +02:00
|
|
|
|
2021-06-16 14:56:24 +02:00
|
|
|
@BeforeEach
|
|
|
|
def setUp() :Unit = {
|
2020-07-10 14:44:50 +02:00
|
|
|
|
2021-06-16 14:56:24 +02:00
|
|
|
super.setUpVocabulary()
|
|
|
|
}
|
2020-07-10 14:44:50 +02:00
|
|
|
|
|
|
|
|
2021-05-04 14:54:12 +02:00
|
|
|
@Test
|
2020-07-10 14:44:50 +02:00
|
|
|
def testEBIData() = {
|
2021-06-03 10:52:09 +02:00
|
|
|
|
|
|
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
2021-05-04 14:54:12 +02:00
|
|
|
val inputXML = Source.fromInputStream(getClass.getResourceAsStream("pubmed.xml")).mkString
|
|
|
|
val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes()))
|
2021-06-16 14:56:24 +02:00
|
|
|
new PMParser(xml).foreach(s =>println(mapper.writeValueAsString(s)))
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
def testPubmedToOaf(): Unit = {
|
|
|
|
assertNotNull(vocabularies)
|
|
|
|
assertTrue(vocabularies.vocabularyExists("dnet:publication_resource"))
|
|
|
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
|
|
|
|
|
|
|
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
|
|
|
|
val records:String =Source.fromInputStream(getClass.getResourceAsStream("pubmed_dump")).mkString
|
|
|
|
val r:List[Oaf] = records.lines.toList.map(s=>mapper.readValue(s, classOf[PMArticle])).map(a => PubMedToOaf.convert(a, vocabularies))
|
|
|
|
assertEquals(10, r.size)
|
|
|
|
assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p)))
|
|
|
|
println(mapper.writeValueAsString(r.head))
|
2021-06-18 16:41:24 +02:00
|
|
|
}
|
|
|
|
|
2020-07-10 14:44:50 +02:00
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
@Test
|
|
|
|
def testPDBToOAF():Unit = {
|
2021-06-03 10:52:09 +02:00
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
assertNotNull(vocabularies)
|
|
|
|
assertTrue(vocabularies.vocabularyExists("dnet:publication_resource"))
|
2020-07-10 14:44:50 +02:00
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
// val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
|
|
|
// mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
|
|
|
|
val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/pdb_dump")).mkString
|
|
|
|
records.lines.foreach(s => assertTrue(s.nonEmpty))
|
2020-07-10 14:44:50 +02:00
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
val result:List[Oaf]= records.lines.toList.flatMap(o => PDBToOAF.convert(o))
|
2020-07-10 14:44:50 +02:00
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
assertTrue(result.nonEmpty)
|
|
|
|
result.foreach(r => assertNotNull(r))
|
2020-07-10 14:44:50 +02:00
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|