forked from D-Net/dnet-hadoop
added pdb to Oaf Transformation
This commit is contained in:
parent
1dc0c59e20
commit
080a280bea
|
@ -2,17 +2,21 @@ package eu.dnetlib.dhp.sx.bio
|
|||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
||||
import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Oaf}
|
||||
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation}
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
object PDBToOAF {
|
||||
|
||||
val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
|
||||
|
||||
val collectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::d1c373ab1570cfb9a7dbb53c186b37a2", "Protein Data Bank")
|
||||
|
||||
def convert(input:String):List[Oaf]= {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json = parse(input)
|
||||
val pdb = (json \ "pdb").extract[String]
|
||||
val pdb = (json \ "pdb").extract[String].toLowerCase
|
||||
|
||||
if (pdb.isEmpty)
|
||||
return List()
|
||||
|
@ -25,12 +29,56 @@ object PDBToOAF {
|
|||
).asJava
|
||||
)
|
||||
|
||||
d.setCollectedfrom(List(collectedFrom).asJava)
|
||||
d.setDataInfo(dataInfo)
|
||||
d.setId(OafMapperUtils.createOpenaireId(50,s"pdb_________::$pdb", true))
|
||||
d.setOriginalId(List(pdb).asJava)
|
||||
|
||||
val title = (json \ "title").extractOrElse[String](null)
|
||||
|
||||
if (title== null)
|
||||
return List()
|
||||
d.setTitle(List(OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo)).asJava)
|
||||
|
||||
|
||||
val authors:List[String] = (json \ "authors").extractOrElse[List[String]](null)
|
||||
|
||||
if (authors!= null)
|
||||
{
|
||||
val convertedAuthors = authors.zipWithIndex.map{a =>
|
||||
|
||||
val res = new Author
|
||||
res.setFullname(a._1)
|
||||
res.setRank(a._2+1)
|
||||
res
|
||||
}
|
||||
|
||||
d.setAuthor(convertedAuthors.asJava)
|
||||
}
|
||||
|
||||
val i = new Instance
|
||||
|
||||
i.setPid(d.getPid)
|
||||
i.setUrl(List(s"https://www.rcsb.org/structure/$pdb").asJava)
|
||||
i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
||||
|
||||
i.setCollectedfrom(collectedFrom)
|
||||
d.setInstance(List(i).asJava)
|
||||
val pmid = (json \ "pmid").extractOrElse[String](null)
|
||||
|
||||
if (pmid != null) {
|
||||
val rel = new Relation
|
||||
rel.setCollectedfrom(List(collectedFrom).asJava)
|
||||
rel.setDataInfo(dataInfo)
|
||||
|
||||
rel.setRelType("resultResult")
|
||||
rel.setSubRelType("supplement")
|
||||
rel.setRelClass("IsSupplementTo")
|
||||
|
||||
rel.setSource(d.getId)
|
||||
rel.setTarget(s"unresolved::$pmid::pmid")
|
||||
return List(d,rel)
|
||||
}
|
||||
List(d)
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
package eu.dnetlib.dhp.sx.bio.pubmed
|
||||
|
||||
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result}
|
||||
import eu.dnetlib.dhp.sx.bio.PDBToOAF
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
|
@ -55,16 +55,20 @@ class BioScholixTest extends AbstractVocabularyTest{
|
|||
assertNotNull(vocabularies)
|
||||
assertTrue(vocabularies.vocabularyExists("dnet:publication_resource"))
|
||||
|
||||
// val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||
// mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
|
||||
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
|
||||
val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/pdb_dump")).mkString
|
||||
records.lines.foreach(s => assertTrue(s.nonEmpty))
|
||||
|
||||
val result:List[Oaf]= records.lines.toList.flatMap(o => PDBToOAF.convert(o))
|
||||
|
||||
|
||||
|
||||
assertTrue(result.nonEmpty)
|
||||
result.foreach(r => assertNotNull(r))
|
||||
|
||||
println(result.count(o => o.isInstanceOf[Relation]))
|
||||
println(mapper.writeValueAsString(result.head))
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue