forked from D-Net/dnet-hadoop
renamed packages
This commit is contained in:
parent
3990165d05
commit
4fe7b75644
|
@ -0,0 +1,32 @@
|
|||
package eu.dnetlib.dhp.sx.bio
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
||||
import eu.dnetlib.dhp.schema.oaf.{Dataset, Oaf}
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
||||
object PDBToOAF {
|
||||
|
||||
def convert(input:String):List[Oaf]= {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json = parse(input)
|
||||
val pdb = (json \ "pdb").extract[String]
|
||||
|
||||
if (pdb.isEmpty)
|
||||
return List()
|
||||
|
||||
val d = new Dataset
|
||||
|
||||
d.setPid(List(OafMapperUtils.structuredProperty()))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
List()
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.ebi.model;
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.ebi.model;
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.ebi.model;
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||
|
||||
public class PMGrant {
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.ebi.model;
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package eu.dnetlib.dhp.sx.ebi.model
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed
|
||||
|
||||
import scala.xml.MetaData
|
||||
import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.ebi.model;
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||
|
||||
public class PMSubject {
|
||||
private String value;
|
|
@ -1,12 +1,10 @@
|
|||
package eu.dnetlib.dhp.sx.ebi.model
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType}
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.language.postfixOps
|
||||
|
||||
object PubMedToOaf {
|
||||
|
||||
|
@ -23,7 +21,7 @@ object PubMedToOaf {
|
|||
case "publication" => new Publication
|
||||
case "other" => new OtherResearchProduct
|
||||
case "software" => new Software
|
||||
case _ =>null
|
||||
case _ => null
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -44,7 +42,6 @@ object PubMedToOaf {
|
|||
}
|
||||
|
||||
|
||||
|
||||
def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = {
|
||||
val a = vocabularies.getSynonymAsQualifier(vocabularyName, term)
|
||||
val b = vocabularies.getTermAsQualifier(vocabularyName, term)
|
||||
|
@ -60,7 +57,7 @@ object PubMedToOaf {
|
|||
return null
|
||||
val i = new Instance
|
||||
var pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo))
|
||||
if (pidList ==null)
|
||||
if (pidList == null)
|
||||
return null
|
||||
if (article.getDoi != null) {
|
||||
pidList = pidList ::: List(OafMapperUtils.structuredProperty(article.getDoi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo))
|
||||
|
@ -89,12 +86,12 @@ object PubMedToOaf {
|
|||
result.setInstance(List(i).asJava)
|
||||
|
||||
|
||||
i.getPid.asScala.filter(p =>"pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection breakOut)
|
||||
i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection breakOut)
|
||||
val urlLists: List[String] = pidList
|
||||
.map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue))
|
||||
.filter(t => t._1.nonEmpty)
|
||||
.map(t => t._1 + t._2)
|
||||
if (urlLists!= null)
|
||||
if (urlLists != null)
|
||||
i.setUrl(urlLists.asJava)
|
||||
i.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo))
|
||||
i.setCollectedfrom(collectedFrom)
|
||||
|
@ -120,12 +117,12 @@ object PubMedToOaf {
|
|||
}
|
||||
|
||||
|
||||
val subjects:List[StructuredProperty] = article.getSubjects.asScala.map(s => OafMapperUtils.structuredProperty(s.getValue, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, dataInfo))(collection breakOut)
|
||||
if (subjects!= null)
|
||||
val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => OafMapperUtils.structuredProperty(s.getValue, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, dataInfo))(collection breakOut)
|
||||
if (subjects != null)
|
||||
result.setSubject(subjects.asJava)
|
||||
|
||||
|
||||
val authors:List[Author] = article.getAuthors.asScala.zipWithIndex.map { case (a, index) =>
|
||||
val authors: List[Author] = article.getAuthors.asScala.zipWithIndex.map { case (a, index) =>
|
||||
val author = new Author()
|
||||
author.setName(a.getForeName)
|
||||
author.setSurname(a.getLastName)
|
||||
|
@ -135,7 +132,7 @@ object PubMedToOaf {
|
|||
}(collection breakOut)
|
||||
|
||||
|
||||
if(authors != null && authors.nonEmpty)
|
||||
if (authors != null && authors.nonEmpty)
|
||||
result.setAuthor(authors.asJava)
|
||||
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||
|
||||
|
@ -150,9 +147,4 @@ object PubMedToOaf {
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -4,7 +4,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants
|
|||
import eu.dnetlib.dhp.schema.oaf.{Author, Instance, Journal, KeyValue, Oaf, Publication, Relation, Dataset => OafDataset}
|
||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils.createQualifier
|
||||
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, OafUtils, ProvenaceInfo}
|
||||
import eu.dnetlib.dhp.sx.ebi.model.{PMArticle, PMAuthor, PMJournal}
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import eu.dnetlib.scholexplorer.relation.RelationMapper
|
||||
import org.apache.commons.io.IOUtils
|
||||
|
@ -20,7 +20,7 @@ import scala.collection.JavaConverters._
|
|||
|
||||
object SparkAddLinkUpdates {
|
||||
|
||||
val relationMapper = RelationMapper.load
|
||||
val relationMapper: RelationMapper = RelationMapper.load
|
||||
|
||||
|
||||
case class EBILinks(relation:String, pubdate:String, tpid:String, tpidType:String, turl:String, title:String, publisher:String) {}
|
||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.sx.ebi
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.oaf.Result
|
||||
import eu.dnetlib.dhp.sx.ebi.model._
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf}
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.sx.ebi;
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
package eu.dnetlib.dhp.sx.ebi
|
||||
package eu.dnetlib.dhp.sx.bio.pubmed
|
||||
|
||||
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Result}
|
||||
import eu.dnetlib.dhp.sx.ebi.model.{PMArticle, PMParser, PubMedToOaf}
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
|
||||
import eu.dnetlib.dhp.sx.bio.PDBToOAF
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||
|
@ -14,7 +14,7 @@ import scala.xml.pull.XMLEventReader
|
|||
|
||||
|
||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||
class TestEBI extends AbstractVocabularyTest{
|
||||
class BioScholixTest extends AbstractVocabularyTest{
|
||||
|
||||
|
||||
@BeforeEach
|
||||
|
@ -46,11 +46,24 @@ class TestEBI extends AbstractVocabularyTest{
|
|||
assertEquals(10, r.size)
|
||||
assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p)))
|
||||
println(mapper.writeValueAsString(r.head))
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testPDBToOAF():Unit = {
|
||||
|
||||
assertNotNull(vocabularies)
|
||||
assertTrue(vocabularies.vocabularyExists("dnet:publication_resource"))
|
||||
|
||||
// val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||
// mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
|
||||
val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/pdb_dump")).mkString
|
||||
records.lines.foreach(s => assertTrue(s.nonEmpty))
|
||||
|
||||
val result:List[Oaf]= records.lines.toList.flatMap(o => PDBToOAF.convert(o))
|
||||
|
||||
assertTrue(result.nonEmpty)
|
||||
result.foreach(r => assertNotNull(r))
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
{"pdb": "1CW0", "title": "crystal structure analysis of very short patch repair (vsr) endonuclease in complex with a duplex dna", "authors": ["S.E.Tsutakawa", "H.Jingami", "K.Morikawa"], "doi": "10.1016/S0092-8674(00)81550-0", "pmid": "10612397"}
|
||||
{"pdb": "2CWW", "title": "crystal structure of thermus thermophilus ttha1280, a putative sam- dependent rna methyltransferase, in complex with s-adenosyl-l- homocysteine", "authors": ["A.A.Pioszak", "K.Murayama", "N.Nakagawa", "A.Ebihara", "S.Kuramitsu", "M.Shirouzu", "S.Yokoyama", "Riken Structural Genomics/proteomics Initiative (Rsgi)"], "doi": "10.1107/S1744309105029842", "pmid": "16511182"}
|
||||
{"pdb": "6CWE", "title": "structure of alpha-gsa[8,6p] bound by cd1d and in complex with the va14vb8.2 tcr", "authors": ["J.Wang", "D.Zajonc"], "doi": null, "pmid": null}
|
||||
{"pdb": "5CWS", "title": "crystal structure of the intact chaetomium thermophilum nsp1-nup49- nup57 channel nucleoporin heterotrimer bound to its nic96 nuclear pore complex attachment site", "authors": ["C.J.Bley", "S.Petrovic", "M.Paduch", "V.Lu", "A.A.Kossiakoff", "A.Hoelz"], "doi": "10.1126/SCIENCE.AAC9176", "pmid": "26316600"}
|
||||
{"pdb": "5CWE", "title": "structure of cyp107l2 from streptomyces avermitilis with lauric acid", "authors": ["T.-V.Pham", "S.-H.Han", "J.-H.Kim", "D.-H.Kim", "L.-W.Kang"], "doi": null, "pmid": null}
|
||||
{"pdb": "7CW4", "title": "acetyl-coa acetyltransferase from bacillus cereus atcc 14579", "authors": ["J.Hong", "K.J.Kim"], "doi": "10.1016/J.BBRC.2020.09.048", "pmid": "32972748"}
|
||||
{"pdb": "2CWP", "title": "crystal structure of metrs related protein from pyrococcus horikoshii", "authors": ["K.Murayama", "M.Kato-Murayama", "M.Shirouzu", "S.Yokoyama", "Riken StructuralGenomics/proteomics Initiative (Rsgi)"], "doi": null, "pmid": null}
|
||||
{"pdb": "2CW7", "title": "crystal structure of intein homing endonuclease ii", "authors": ["H.Matsumura", "H.Takahashi", "T.Inoue", "H.Hashimoto", "M.Nishioka", "S.Fujiwara", "M.Takagi", "T.Imanaka", "Y.Kai"], "doi": "10.1002/PROT.20858", "pmid": "16493661"}
|
||||
{"pdb": "1CWU", "title": "brassica napus enoyl acp reductase a138g mutant complexed with nad+ and thienodiazaborine", "authors": ["A.Roujeinikova", "J.B.Rafferty", "D.W.Rice"], "doi": "10.1074/JBC.274.43.30811", "pmid": "10521472"}
|
||||
{"pdb": "3CWN", "title": "escherichia coli transaldolase b mutant f178y", "authors": ["T.Sandalova", "G.Schneider", "A.Samland"], "doi": "10.1074/JBC.M803184200", "pmid": "18687684"}
|
||||
{"pdb": "1CWL", "title": "human cyclophilin a complexed with 4 4-hydroxy-meleu cyclosporin", "authors": ["V.Mikol", "J.Kallen", "P.Taylor", "M.D.Walkinshaw"], "doi": "10.1006/JMBI.1998.2108", "pmid": "9769216"}
|
||||
{"pdb": "3CW2", "title": "crystal structure of the intact archaeal translation initiation factor 2 from sulfolobus solfataricus .", "authors": ["E.A.Stolboushkina", "S.V.Nikonov", "A.D.Nikulin", "U.Blaesi", "D.J.Manstein", "R.V.Fedorov", "M.B.Garber", "O.S.Nikonov"], "doi": "10.1016/J.JMB.2008.07.039", "pmid": "18675278"}
|
||||
{"pdb": "3CW9", "title": "4-chlorobenzoyl-coa ligase/synthetase in the thioester-forming conformation, bound to 4-chlorophenacyl-coa", "authors": ["A.S.Reger", "J.Cao", "R.Wu", "D.Dunaway-Mariano", "A.M.Gulick"], "doi": "10.1021/BI800696Y", "pmid": "18620418"}
|
||||
{"pdb": "3CWU", "title": "crystal structure of an alka host/guest complex 2'-fluoro-2'-deoxy-1, n6-ethenoadenine:thymine base pair", "authors": ["B.R.Bowman", "S.Lee", "S.Wang", "G.L.Verdine"], "doi": "10.1016/J.STR.2008.04.012", "pmid": "18682218"}
|
||||
{"pdb": "5CWF", "title": "crystal structure of de novo designed helical repeat protein dhr8", "authors": ["G.Bhabha", "D.C.Ekiert"], "doi": "10.1038/NATURE16162", "pmid": "26675729"}
|
Loading…
Reference in New Issue