From 080a280bea23f70b70e7e96b0c149b3651035db7 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 21 Jun 2021 16:23:59 +0200 Subject: [PATCH] added pdb to Oaf Transformation --- .../java/eu/dnetlib/dhp/sx/bio/PDBToOAF.scala | 58 +++++++++++++++++-- .../dhp/sx/bio/pubmed/BioScholixTest.scala | 10 +++- 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/PDBToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/PDBToOAF.scala index e255a2270..a37c6529f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/PDBToOAF.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/PDBToOAF.scala @@ -2,17 +2,21 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils -import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Oaf} +import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation} import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse + import scala.collection.JavaConverters._ object PDBToOAF { val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") + + val collectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::d1c373ab1570cfb9a7dbb53c186b37a2", "Protein Data Bank") + def convert(input:String):List[Oaf]= { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) - val pdb = (json \ "pdb").extract[String] + val pdb = (json \ "pdb").extract[String].toLowerCase if (pdb.isEmpty) return List() @@ -25,12 +29,56 @@ object PDBToOAF { ).asJava ) + d.setCollectedfrom(List(collectedFrom).asJava) + d.setDataInfo(dataInfo) + d.setId(OafMapperUtils.createOpenaireId(50,s"pdb_________::$pdb", true)) + d.setOriginalId(List(pdb).asJava) + + val title = (json \ "title").extractOrElse[String](null) + + if (title== null) + return List() + d.setTitle(List(OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo)).asJava) + val authors:List[String] = (json \ "authors").extractOrElse[List[String]](null) + if (authors!= null) + { + val convertedAuthors = authors.zipWithIndex.map{a => + + val res = new Author + res.setFullname(a._1) + res.setRank(a._2+1) + res + } + + d.setAuthor(convertedAuthors.asJava) + } + + val i = new Instance + + i.setPid(d.getPid) + i.setUrl(List(s"https://www.rcsb.org/structure/$pdb").asJava) + i.setInstancetype(OafMapperUtils.qualifier("0046", "Bioentity", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) + + i.setCollectedfrom(collectedFrom) + d.setInstance(List(i).asJava) + val pmid = (json \ "pmid").extractOrElse[String](null) + + if (pmid != null) { + val rel = new Relation + rel.setCollectedfrom(List(collectedFrom).asJava) + rel.setDataInfo(dataInfo) + + rel.setRelType("resultResult") + rel.setSubRelType("supplement") + rel.setRelClass("IsSupplementTo") + + rel.setSource(d.getId) + rel.setTarget(s"unresolved::$pmid::pmid") + return List(d,rel) + } List(d) - } - - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/bio/pubmed/BioScholixTest.scala index e6699ccb9..f7a5bcd17 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/bio/pubmed/BioScholixTest.scala @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.sx.bio.pubmed import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} -import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} import eu.dnetlib.dhp.sx.bio.PDBToOAF import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.extension.ExtendWith @@ -55,16 +55,20 @@ class BioScholixTest extends AbstractVocabularyTest{ assertNotNull(vocabularies) assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) -// val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) -// mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false) + val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false) val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/pdb_dump")).mkString records.lines.foreach(s => assertTrue(s.nonEmpty)) val result:List[Oaf]= records.lines.toList.flatMap(o => PDBToOAF.convert(o)) + + assertTrue(result.nonEmpty) result.foreach(r => assertNotNull(r)) + println(result.count(o => o.isInstanceOf[Relation])) + println(mapper.writeValueAsString(result.head)) }