forked from D-Net/dnet-hadoop
added pdb to Oaf Transformation
This commit is contained in:
parent
1dc0c59e20
commit
080a280bea
|
@ -2,17 +2,21 @@ package eu.dnetlib.dhp.sx.bio
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
||||||
import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Oaf}
|
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation}
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.jackson.JsonMethods.parse
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
object PDBToOAF {
|
object PDBToOAF {
|
||||||
|
|
||||||
val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
|
val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
|
||||||
|
|
||||||
|
val collectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::d1c373ab1570cfb9a7dbb53c186b37a2", "Protein Data Bank")
|
||||||
|
|
||||||
def convert(input:String):List[Oaf]= {
|
def convert(input:String):List[Oaf]= {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json = parse(input)
|
lazy val json = parse(input)
|
||||||
val pdb = (json \ "pdb").extract[String]
|
val pdb = (json \ "pdb").extract[String].toLowerCase
|
||||||
|
|
||||||
if (pdb.isEmpty)
|
if (pdb.isEmpty)
|
||||||
return List()
|
return List()
|
||||||
|
@ -25,12 +29,56 @@ object PDBToOAF {
|
||||||
).asJava
|
).asJava
|
||||||
)
|
)
|
||||||
|
|
||||||
|
d.setCollectedfrom(List(collectedFrom).asJava)
|
||||||
|
d.setDataInfo(dataInfo)
|
||||||
|
d.setId(OafMapperUtils.createOpenaireId(50,s"pdb_________::$pdb", true))
|
||||||
|
d.setOriginalId(List(pdb).asJava)
|
||||||
|
|
||||||
|
val title = (json \ "title").extractOrElse[String](null)
|
||||||
|
|
||||||
|
if (title== null)
|
||||||
|
return List()
|
||||||
|
d.setTitle(List(OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo)).asJava)
|
||||||
|
|
||||||
|
|
||||||
|
val authors:List[String] = (json \ "authors").extractOrElse[List[String]](null)
|
||||||
|
|
||||||
List(d)
|
if (authors!= null)
|
||||||
|
{
|
||||||
|
val convertedAuthors = authors.zipWithIndex.map{a =>
|
||||||
|
|
||||||
|
val res = new Author
|
||||||
|
res.setFullname(a._1)
|
||||||
|
res.setRank(a._2+1)
|
||||||
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
d.setAuthor(convertedAuthors.asJava)
|
||||||
|
}
|
||||||
|
|
||||||
|
val i = new Instance
|
||||||
|
|
||||||
|
i.setPid(d.getPid)
|
||||||
|
i.setUrl(List(s"https://www.rcsb.org/structure/$pdb").asJava)
|
||||||
|
i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
||||||
|
|
||||||
|
i.setCollectedfrom(collectedFrom)
|
||||||
|
d.setInstance(List(i).asJava)
|
||||||
|
val pmid = (json \ "pmid").extractOrElse[String](null)
|
||||||
|
|
||||||
|
if (pmid != null) {
|
||||||
|
val rel = new Relation
|
||||||
|
rel.setCollectedfrom(List(collectedFrom).asJava)
|
||||||
|
rel.setDataInfo(dataInfo)
|
||||||
|
|
||||||
|
rel.setRelType("resultResult")
|
||||||
|
rel.setSubRelType("supplement")
|
||||||
|
rel.setRelClass("IsSupplementTo")
|
||||||
|
|
||||||
|
rel.setSource(d.getId)
|
||||||
|
rel.setTarget(s"unresolved::$pmid::pmid")
|
||||||
|
return List(d,rel)
|
||||||
|
}
|
||||||
|
List(d)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package eu.dnetlib.dhp.sx.bio.pubmed
|
package eu.dnetlib.dhp.sx.bio.pubmed
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
|
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
|
import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result}
|
||||||
import eu.dnetlib.dhp.sx.bio.PDBToOAF
|
import eu.dnetlib.dhp.sx.bio.PDBToOAF
|
||||||
import org.junit.jupiter.api.Assertions._
|
import org.junit.jupiter.api.Assertions._
|
||||||
import org.junit.jupiter.api.extension.ExtendWith
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
|
@ -55,16 +55,20 @@ class BioScholixTest extends AbstractVocabularyTest{
|
||||||
assertNotNull(vocabularies)
|
assertNotNull(vocabularies)
|
||||||
assertTrue(vocabularies.vocabularyExists("dnet:publication_resource"))
|
assertTrue(vocabularies.vocabularyExists("dnet:publication_resource"))
|
||||||
|
|
||||||
// val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||||
// mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
|
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
|
||||||
val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/pdb_dump")).mkString
|
val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/pdb_dump")).mkString
|
||||||
records.lines.foreach(s => assertTrue(s.nonEmpty))
|
records.lines.foreach(s => assertTrue(s.nonEmpty))
|
||||||
|
|
||||||
val result:List[Oaf]= records.lines.toList.flatMap(o => PDBToOAF.convert(o))
|
val result:List[Oaf]= records.lines.toList.flatMap(o => PDBToOAF.convert(o))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
assertTrue(result.nonEmpty)
|
assertTrue(result.nonEmpty)
|
||||||
result.foreach(r => assertNotNull(r))
|
result.foreach(r => assertNotNull(r))
|
||||||
|
|
||||||
|
println(result.count(o => o.isInstanceOf[Relation]))
|
||||||
|
println(mapper.writeValueAsString(result.head))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue