added pdb to Oaf Transformation

This commit is contained in:
Sandro La Bruzzo 2021-06-21 16:23:59 +02:00
parent 1dc0c59e20
commit 080a280bea
2 changed files with 60 additions and 8 deletions

View File

@ -2,17 +2,21 @@ package eu.dnetlib.dhp.sx.bio
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Oaf}
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation}
import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods.parse
import scala.collection.JavaConverters._
object PDBToOAF {
val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
val collectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::d1c373ab1570cfb9a7dbb53c186b37a2", "Protein Data Bank")
def convert(input:String):List[Oaf]= {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json = parse(input)
val pdb = (json \ "pdb").extract[String]
val pdb = (json \ "pdb").extract[String].toLowerCase
if (pdb.isEmpty)
return List()
@ -25,12 +29,56 @@ object PDBToOAF {
).asJava
)
d.setCollectedfrom(List(collectedFrom).asJava)
d.setDataInfo(dataInfo)
d.setId(OafMapperUtils.createOpenaireId(50,s"pdb_________::$pdb", true))
d.setOriginalId(List(pdb).asJava)
val title = (json \ "title").extractOrElse[String](null)
if (title== null)
return List()
d.setTitle(List(OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo)).asJava)
val authors:List[String] = (json \ "authors").extractOrElse[List[String]](null)
List(d)
if (authors!= null)
{
val convertedAuthors = authors.zipWithIndex.map{a =>
val res = new Author
res.setFullname(a._1)
res.setRank(a._2+1)
res
}
d.setAuthor(convertedAuthors.asJava)
}
val i = new Instance
i.setPid(d.getPid)
i.setUrl(List(s"https://www.rcsb.org/structure/$pdb").asJava)
i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
i.setCollectedfrom(collectedFrom)
d.setInstance(List(i).asJava)
val pmid = (json \ "pmid").extractOrElse[String](null)
if (pmid != null) {
val rel = new Relation
rel.setCollectedfrom(List(collectedFrom).asJava)
rel.setDataInfo(dataInfo)
rel.setRelType("resultResult")
rel.setSubRelType("supplement")
rel.setRelClass("IsSupplementTo")
rel.setSource(d.getId)
rel.setTarget(s"unresolved::$pmid::pmid")
return List(d,rel)
}
List(d)
}
}

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.sx.bio.pubmed
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result}
import eu.dnetlib.dhp.sx.bio.PDBToOAF
import org.junit.jupiter.api.Assertions._
import org.junit.jupiter.api.extension.ExtendWith
@ -55,16 +55,20 @@ class BioScholixTest extends AbstractVocabularyTest{
assertNotNull(vocabularies)
assertTrue(vocabularies.vocabularyExists("dnet:publication_resource"))
// val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
// mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/pdb_dump")).mkString
records.lines.foreach(s => assertTrue(s.nonEmpty))
val result:List[Oaf]= records.lines.toList.flatMap(o => PDBToOAF.convert(o))
assertTrue(result.nonEmpty)
result.foreach(r => assertNotNull(r))
println(result.count(o => o.isInstanceOf[Relation]))
println(mapper.writeValueAsString(result.head))
}