forked from D-Net/dnet-hadoop
Implemented mapping from pubmed baseline to OAF
This commit is contained in:
parent
aeb8132627
commit
cc0f2b11fb
|
@ -1,12 +1,20 @@
|
||||||
package eu.dnetlib.dhp.sx.ebi
|
package eu.dnetlib.dhp.sx.ebi
|
||||||
|
|
||||||
|
import com.esotericsoftware.kryo.Kryo
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.IOUtils
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||||
import eu.dnetlib.dhp.sx.ebi.model.{PMArticle, PMAuthor, PMJournal, PMParser}
|
import eu.dnetlib.dhp.sx.ebi.model.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf}
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||||
import org.apache.spark.sql.expressions.Aggregator
|
import org.apache.spark.sql.expressions.Aggregator
|
||||||
|
import org.objenesis.strategy.StdInstantiatorStrategy
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
import scala.xml.pull.XMLEventReader
|
import scala.xml.pull.XMLEventReader
|
||||||
|
@ -36,8 +44,14 @@ object SparkCreateBaselineDataFrame {
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
def main(args: Array[String]): Unit = {
|
||||||
val conf: SparkConf = new SparkConf()
|
val conf: SparkConf = new SparkConf()
|
||||||
|
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||||
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateEBIDataFrame.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/ebi_to_df_params.json")))
|
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkCreateEBIDataFrame.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/ebi/ebi_to_df_params.json")))
|
||||||
parser.parseArgument(args)
|
parser.parseArgument(args)
|
||||||
|
val isLookupUrl: String = parser.get("isLookupUrl")
|
||||||
|
log.info("isLookupUrl: {}", isLookupUrl)
|
||||||
|
|
||||||
|
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
|
||||||
|
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
||||||
val spark: SparkSession =
|
val spark: SparkSession =
|
||||||
SparkSession
|
SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
|
@ -54,6 +68,8 @@ object SparkCreateBaselineDataFrame {
|
||||||
implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle])
|
implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle])
|
||||||
implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal])
|
implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal])
|
||||||
implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor])
|
implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor])
|
||||||
|
implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result])
|
||||||
|
|
||||||
val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline",2000)
|
val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline",2000)
|
||||||
val ds:Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i =>{
|
val ds:Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i =>{
|
||||||
val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes()))
|
val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes()))
|
||||||
|
@ -64,5 +80,29 @@ object SparkCreateBaselineDataFrame {
|
||||||
ds.map(p => (p.getPmid,p))(Encoders.tuple(Encoders.STRING, PMEncoder)).groupByKey(_._1)
|
ds.map(p => (p.getPmid,p))(Encoders.tuple(Encoders.STRING, PMEncoder)).groupByKey(_._1)
|
||||||
.agg(pmArticleAggregator.toColumn)
|
.agg(pmArticleAggregator.toColumn)
|
||||||
.map(p => p._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset")
|
.map(p => p._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset")
|
||||||
|
|
||||||
|
val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle]
|
||||||
|
exported_dataset
|
||||||
|
.map(a => PubMedToOaf.convert(a, vocabularies)).as[Result]
|
||||||
|
.filter(p => p!= null)
|
||||||
|
.write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_oaf")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_values(a : PMArticle):(String, String) = {
|
||||||
|
val l:String = a.getPublicationTypes.asScala.map(p => p.getValue).mkString(",")
|
||||||
|
|
||||||
|
(a.getPmid, l)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
val ks:Dataset[(String,String)] =spark.read.load("/data/scholix/baseline_dataset").as[PMArticle].map(a => extract_values(a))(Encoders.tuple(Encoders.STRING,Encoders.STRING))
|
||||||
|
|
||||||
|
val ids:Dataset[String] = spark.read.load("/tmp/missing_pubmed").as[String]
|
||||||
|
|
||||||
|
ks.joinWith(ids, ks("_1").equalTo(ids("value")), "inner").map(k => k._1._2).distinct.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ public class PMArticle implements Serializable {
|
||||||
private String language;
|
private String language;
|
||||||
private final List<PMSubject> subjects = new ArrayList<>();
|
private final List<PMSubject> subjects = new ArrayList<>();
|
||||||
private final List<PMSubject> publicationTypes = new ArrayList<>();
|
private final List<PMSubject> publicationTypes = new ArrayList<>();
|
||||||
|
private List<PMAuthor> authors = new ArrayList<>();
|
||||||
|
|
||||||
public List<PMSubject> getPublicationTypes() {
|
public List<PMSubject> getPublicationTypes() {
|
||||||
return publicationTypes;
|
return publicationTypes;
|
||||||
|
@ -35,8 +36,6 @@ public class PMArticle implements Serializable {
|
||||||
this.doi = doi;
|
this.doi = doi;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<PMAuthor> authors = new ArrayList<>();
|
|
||||||
|
|
||||||
public String getPmid() {
|
public String getPmid() {
|
||||||
return pmid;
|
return pmid;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,158 @@
|
||||||
|
package eu.dnetlib.dhp.sx.ebi.model
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType}
|
||||||
|
import eu.dnetlib.dhp.schema.oaf._
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
import scala.language.postfixOps
|
||||||
|
|
||||||
|
object PubMedToOaf {
|
||||||
|
|
||||||
|
val SUBJ_CLASS = "keywords"
|
||||||
|
val urlMap = Map(
|
||||||
|
"pmid" -> "https://pubmed.ncbi.nlm.nih.gov/",
|
||||||
|
"doi" -> "https://dx.doi.org/"
|
||||||
|
)
|
||||||
|
|
||||||
|
def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = {
|
||||||
|
val result_typologies = getVocabularyTerm("dnet:result_typologies", vocabularies, cobjQualifier.getClassid)
|
||||||
|
result_typologies.getClassid match {
|
||||||
|
case "dataset" => new Dataset
|
||||||
|
case "publication" => new Publication
|
||||||
|
case "other" => new OtherResearchProduct
|
||||||
|
case "software" => new Software
|
||||||
|
case _ =>null
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def mapJournal(j: PMJournal): Journal = {
|
||||||
|
if (j == null)
|
||||||
|
return null
|
||||||
|
val journal = new Journal
|
||||||
|
|
||||||
|
journal.setDataInfo(dataInfo)
|
||||||
|
journal.setName(j.getTitle)
|
||||||
|
journal.setVol(j.getVolume)
|
||||||
|
journal.setIssnPrinted(j.getIssn)
|
||||||
|
journal.setIss(j.getIssue)
|
||||||
|
journal
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = {
|
||||||
|
val a = vocabularies.getSynonymAsQualifier(vocabularyName, term)
|
||||||
|
val b = vocabularies.getTermAsQualifier(vocabularyName, term)
|
||||||
|
if (a == null) b else a
|
||||||
|
}
|
||||||
|
|
||||||
|
val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
|
||||||
|
val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central")
|
||||||
|
|
||||||
|
def convert(article: PMArticle, vocabularies: VocabularyGroup): Result = {
|
||||||
|
|
||||||
|
if (article.getPublicationTypes == null)
|
||||||
|
return null
|
||||||
|
val i = new Instance
|
||||||
|
var pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo))
|
||||||
|
if (pidList ==null)
|
||||||
|
return null
|
||||||
|
if (article.getDoi != null) {
|
||||||
|
pidList = pidList ::: List(OafMapperUtils.structuredProperty(article.getDoi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo))
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the article contains the typology Journal Article then we apply this type
|
||||||
|
//else We have to find a terms that match the vocabulary otherwise we discard it
|
||||||
|
val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue))
|
||||||
|
if (ja.isDefined) {
|
||||||
|
val cojbCategory = getVocabularyTerm("dnet:publication_resource", vocabularies, ja.get.getValue)
|
||||||
|
i.setInstancetype(cojbCategory)
|
||||||
|
} else {
|
||||||
|
val i_type = article.getPublicationTypes.asScala
|
||||||
|
.map(s => getVocabularyTerm("dnet:publication_resource", vocabularies, s.getValue))
|
||||||
|
.find(q => q != null)
|
||||||
|
if (i_type.isDefined)
|
||||||
|
i.setInstancetype(i_type.get)
|
||||||
|
else
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
val result = createResult(i.getInstancetype, vocabularies)
|
||||||
|
if (result == null)
|
||||||
|
return result
|
||||||
|
result.setDataInfo(dataInfo)
|
||||||
|
i.setPid(pidList.asJava)
|
||||||
|
result.setInstance(List(i).asJava)
|
||||||
|
|
||||||
|
|
||||||
|
i.getPid.asScala.filter(p =>"pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection breakOut)
|
||||||
|
val urlLists: List[String] = pidList
|
||||||
|
.map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue))
|
||||||
|
.filter(t => t._1.nonEmpty)
|
||||||
|
.map(t => t._1 + t._2)
|
||||||
|
if (urlLists!= null)
|
||||||
|
i.setUrl(urlLists.asJava)
|
||||||
|
i.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo))
|
||||||
|
i.setCollectedfrom(collectedFrom)
|
||||||
|
result.setPid(pidList.asJava)
|
||||||
|
if (article.getJournal != null && result.isInstanceOf[Publication])
|
||||||
|
result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal))
|
||||||
|
result.setCollectedfrom(List(collectedFrom).asJava)
|
||||||
|
|
||||||
|
result.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo))
|
||||||
|
|
||||||
|
if (article.getTitle == null || article.getTitle.isEmpty)
|
||||||
|
return null
|
||||||
|
result.setTitle(List(OafMapperUtils.structuredProperty(article.getTitle, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo)).asJava)
|
||||||
|
|
||||||
|
if (article.getDescription != null && article.getDescription.nonEmpty)
|
||||||
|
result.setDescription(List(OafMapperUtils.field(article.getDescription, dataInfo)).asJava)
|
||||||
|
|
||||||
|
if (article.getLanguage != null) {
|
||||||
|
|
||||||
|
val term = vocabularies.getSynonymAsQualifier("dnet:languages", article.getLanguage)
|
||||||
|
if (term != null)
|
||||||
|
result.setLanguage(term)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
val subjects:List[StructuredProperty] = article.getSubjects.asScala.map(s => OafMapperUtils.structuredProperty(s.getValue, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, dataInfo))(collection breakOut)
|
||||||
|
if (subjects!= null)
|
||||||
|
result.setSubject(subjects.asJava)
|
||||||
|
|
||||||
|
|
||||||
|
val authors:List[Author] = article.getAuthors.asScala.zipWithIndex.map { case (a, index) =>
|
||||||
|
val author = new Author()
|
||||||
|
author.setName(a.getForeName)
|
||||||
|
author.setSurname(a.getLastName)
|
||||||
|
author.setFullname(a.getFullName)
|
||||||
|
author.setRank(index + 1)
|
||||||
|
author
|
||||||
|
}(collection breakOut)
|
||||||
|
|
||||||
|
|
||||||
|
if(authors != null && authors.nonEmpty)
|
||||||
|
result.setAuthor(authors.asJava)
|
||||||
|
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||||
|
|
||||||
|
|
||||||
|
result.setId(article.getPmid)
|
||||||
|
|
||||||
|
val id = IdentifierFactory.createIdentifier(result)
|
||||||
|
if (article.getPmid.equalsIgnoreCase(id))
|
||||||
|
return null
|
||||||
|
result.setId(id)
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
[
|
[
|
||||||
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
||||||
{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}
|
{"paramName":"i", "paramLongName":"isLookupUrl","paramDescription": "isLookupUrl", "paramRequired": true},
|
||||||
|
{"paramName":"w", "paramLongName":"workingPath","paramDescription": "the path of the sequencial file to read", "paramRequired": true}
|
||||||
]
|
]
|
|
@ -4,6 +4,10 @@
|
||||||
<name>baselineWorkingPath</name>
|
<name>baselineWorkingPath</name>
|
||||||
<description>the Baseline Working Path</description>
|
<description>the Baseline Working Path</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>isLookupUrl</name>
|
||||||
|
<description>The IS lookUp service endopoint</description>
|
||||||
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="ConvertDataset"/>
|
<start to="ConvertDataset"/>
|
||||||
|
@ -31,6 +35,7 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--workingPath</arg><arg>${baselineWorkingPath}</arg>
|
<arg>--workingPath</arg><arg>${baselineWorkingPath}</arg>
|
||||||
<arg>--master</arg><arg>yarn</arg>
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.sx.ebi;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public abstract class AbstractVocabularyTest {
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
protected ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
protected VocabularyGroup vocabularies;
|
||||||
|
|
||||||
|
public void setUpVocabulary() throws ISLookUpException, IOException {
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||||
|
|
||||||
|
lenient()
|
||||||
|
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||||
|
.thenReturn(synonyms());
|
||||||
|
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> vocs() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> synonyms() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,17 +1,27 @@
|
||||||
package eu.dnetlib.dhp.sx.ebi
|
package eu.dnetlib.dhp.sx.ebi
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
|
||||||
import com.fasterxml.jackson.databind.SerializationFeature
|
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Result}
|
||||||
|
import eu.dnetlib.dhp.sx.ebi.model.{PMArticle, PMParser, PubMedToOaf}
|
||||||
|
import org.junit.jupiter.api.Assertions._
|
||||||
import eu.dnetlib.dhp.sx.ebi.model.PMParser
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
import scala.xml.pull.XMLEventReader
|
import scala.xml.pull.XMLEventReader
|
||||||
|
|
||||||
class TestEBI {
|
|
||||||
|
|
||||||
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||||
|
class TestEBI extends AbstractVocabularyTest{
|
||||||
|
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
def setUp() :Unit = {
|
||||||
|
|
||||||
|
super.setUpVocabulary()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -20,10 +30,25 @@ class TestEBI {
|
||||||
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||||
val inputXML = Source.fromInputStream(getClass.getResourceAsStream("pubmed.xml")).mkString
|
val inputXML = Source.fromInputStream(getClass.getResourceAsStream("pubmed.xml")).mkString
|
||||||
val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes()))
|
val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes()))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
new PMParser(xml).foreach(s =>println(mapper.writeValueAsString(s)))
|
new PMParser(xml).foreach(s =>println(mapper.writeValueAsString(s)))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testPubmedToOaf(): Unit = {
|
||||||
|
assertNotNull(vocabularies)
|
||||||
|
assertTrue(vocabularies.vocabularyExists("dnet:publication_resource"))
|
||||||
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||||
|
|
||||||
|
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
|
||||||
|
val records:String =Source.fromInputStream(getClass.getResourceAsStream("pubmed_dump")).mkString
|
||||||
|
val r:List[Oaf] = records.lines.toList.map(s=>mapper.readValue(s, classOf[PMArticle])).map(a => PubMedToOaf.convert(a, vocabularies))
|
||||||
|
assertEquals(10, r.size)
|
||||||
|
assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p)))
|
||||||
|
println(mapper.writeValueAsString(r.head))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
{"pmid":"10025635","doi":"10.1053/jvet.1999.0010","date":"1999-02-20","journal":{"issn":"0161-3499","volume":"28","issue":"1","date":"2019-10-24","title":"Veterinary surgery : VS"},"title":"Canine uncemented porous-coated anatomic total hip arthroplasty: results of a long-term prospective evaluation of 50 consecutive cases.","description":"To evaluate the long-term clinical and radiographic results of a canine uncemented porous-coated anatomic (PCA) total hip arthroplasty (THA).Prospective study of consecutive clinical patients using survival analysis.Forty-one dogs that underwent PCA THA; nine had bilateral PCA THA (50 prostheses).Gait observation, orthopedic examination, and radiographic assessment were conducted before THA, 6 months after THA, and yearly thereafter. A zonal analysis system was used to document osseous changes in the femur and the acetabulum. Acetabular cup and femoral stem subsidence and migration, femoral canal fill, and implant orientation were measured. Survival analysis of the procedure was conducted.Long-term follow-up was available for 37 dogs (46 prostheses). The median follow-up was 63 months. Limb function was normal for 37 limbs and abnormal for 9 limbs because of dislocation (n = 3), lumbosacral disease (n = 2), degenerative myelopathy (n = 1), autoimmune disease (n = 1), brain tumor (n = 1), or osteosarcoma of the femur (n = 1). All prosthetic stems and cups were fixed by bone ingrowth fixation. Osteolysis was not observed. Bone infarction occurred in five femoral canals (four dogs). The 6-year survival rate for the procedure was 87% (95% confidence interval, 72%-96%).Long-term fixation of the uncemented PCA acetabular cup and stem is successful in dogs, and long-term clinical function is excellent.","language":"eng","subjects":[{"value":"Animals","meshId":"D000818","registryNumber":null},{"value":"Arthroplasty, Replacement, Hip","meshId":"D019644","registryNumber":null},{"value":"Dogs","meshId":"D004285","registryNumber":null},{"value":"Follow-Up Studies","meshId":"D005500","registryNumber":null},{"value":"Hip Joint","meshId":"D006621","registryNumber":null},{"value":"Hip Prosthesis","meshId":"D006622","registryNumber":null},{"value":"Osseointegration","meshId":"D016348","registryNumber":null},{"value":"Prospective Studies","meshId":"D011446","registryNumber":null},{"value":"Radiography","meshId":"D011859","registryNumber":null},{"value":"Survival Analysis","meshId":"D016019","registryNumber":null},{"value":"Treatment Outcome","meshId":"D016896","registryNumber":null}],"publicationTypes":[{"value":"Clinical Trial","meshId":"D016428","registryNumber":null},{"value":"Research Support, Non-U.S. Gov't","meshId":"D013485","registryNumber":null}],"grants":[],"authors":[{"lastName":"Marcellin-Little","foreName":"D J","fullName":"D J, Marcellin-Little"},{"lastName":"DeYoung","foreName":"B A","fullName":"B A, DeYoung"},{"lastName":"Doyens","foreName":"D H","fullName":"D H, Doyens"},{"lastName":"DeYoung","foreName":"D J","fullName":"D J, DeYoung"}]}
|
||||||
|
{"pmid":"10003342","doi":"10.1103/physrevb.46.13035","date":"1992-11-15","journal":{"issn":"0163-1829","volume":"46","issue":"20","date":"1992-Nov-15","title":"Physical review. B, Condensed matter"},"title":"Transient nonlinear optical phenomena in exciton-phonon systems.","description":null,"language":"eng","subjects":[],"publicationTypes":[{"value":"Journal Article","meshId":"D016428","registryNumber":null}],"grants":[],"authors":[{"lastName":"Li","foreName":null,"fullName":", Li"},{"lastName":"Lin","foreName":null,"fullName":", Lin"},{"lastName":"George","foreName":null,"fullName":", George"},{"lastName":"Sun","foreName":null,"fullName":", Sun"}]}
|
||||||
|
{"pmid":"10003566","doi":"10.1103/physrevb.46.14624","date":"1992-12-01","journal":{"issn":"0163-1829","volume":"46","issue":"22","date":"1992-Dec-01","title":"Physical review. B, Condensed matter"},"title":"Charge-spin recombination in the one-dimensional supersymmetric t-J model.","description":null,"language":"eng","subjects":[],"publicationTypes":[{"value":"Journal Article","meshId":"D016428","registryNumber":null}],"grants":[],"authors":[{"lastName":"Bares","foreName":null,"fullName":", Bares"},{"lastName":"Carmelo","foreName":null,"fullName":", Carmelo"},{"lastName":"Ferrer","foreName":null,"fullName":", Ferrer"},{"lastName":"Horsch","foreName":null,"fullName":", Horsch"}]}
|
||||||
|
{"pmid":"10004390","doi":"10.1103/physrevb.46.5822","date":"1992-09-01","journal":{"issn":"0163-1829","volume":"46","issue":"9","date":"1992-Sep-01","title":"Physical review. B, Condensed matter"},"title":"Magnetic penetration depth of kappa -(BEDT-TTF)2Cu","description":null,"language":"eng","subjects":[],"publicationTypes":[{"value":"Journal Article","meshId":"D016428","registryNumber":null}],"grants":[],"authors":[{"lastName":"Lang","foreName":null,"fullName":", Lang"},{"lastName":"Toyota","foreName":null,"fullName":", Toyota"},{"lastName":"Sasaki","foreName":null,"fullName":", Sasaki"},{"lastName":"Sato","foreName":null,"fullName":", Sato"}]}
|
||||||
|
{"pmid":"10005145","doi":"10.1103/physrevb.47.10358","date":"1993-04-15","journal":{"issn":"0163-1829","volume":"47","issue":"16","date":"1993-Apr-15","title":"Physical review. B, Condensed matter"},"title":"Polaron-cyclotron-resonance spectrum resulting from interface- and slab-phonon modes in a GaAs/AlAs quantum well.","description":null,"language":"eng","subjects":[],"publicationTypes":[{"value":"Journal Article","meshId":"D016428","registryNumber":null}],"grants":[],"authors":[{"lastName":"Hai","foreName":null,"fullName":", Hai"},{"lastName":"Peeters","foreName":null,"fullName":", Peeters"},{"lastName":"Devreese","foreName":null,"fullName":", Devreese"}]}
|
||||||
|
{"pmid":"10015977","doi":"10.1103/physrevd.47.3580","date":"1993-04-15","journal":{"issn":"0556-2821","volume":"47","issue":"8","date":"1993-Apr-15","title":"Physical review. D, Particles and fields"},"title":"Equivalence and compositeness: Beyond 1/Nc in four-fermion theories.","description":null,"language":"eng","subjects":[],"publicationTypes":[{"value":"Journal Article","meshId":"D016428","registryNumber":null}],"grants":[],"authors":[{"lastName":"Luri?","foreName":null,"fullName":", Luri?"},{"lastName":"Tupper","foreName":null,"fullName":", Tupper"}]}
|
||||||
|
{"pmid":"10018960","doi":"10.1103/physrevd.51.4844","date":"1995-05-01","journal":{"issn":"0556-2821","volume":"51","issue":"9","date":"1995-May-01","title":"Physical review. D, Particles and fields"},"title":"Unifying logarithmic and factorial behavior in high-energy scattering.","description":null,"language":"eng","subjects":[],"publicationTypes":[{"value":"Journal Article","meshId":"D016428","registryNumber":null}],"grants":[],"authors":[{"lastName":"Cornwall","foreName":null,"fullName":", Cornwall"},{"lastName":"Morris","foreName":null,"fullName":", Morris"}]}
|
||||||
|
{"pmid":"1002019","doi":null,"date":"1976-09-01","journal":{"issn":"0017-7768","volume":"91","issue":"5-6","date":"1976-Sep-18","title":"Harefuah"},"title":"[Pre-fabricated modular operating theatres at Assaf-Harofeh Hospital].","description":null,"language":"heb","subjects":[{"value":"Hospital Design and Construction","meshId":"D006749","registryNumber":null},{"value":"Israel","meshId":"D007557","registryNumber":null},{"value":"Operating Rooms","meshId":"D009873","registryNumber":null}],"publicationTypes":[{"value":"Journal Article","meshId":"D016428","registryNumber":null}],"grants":[],"authors":[{"lastName":"Toretz","foreName":"M Y","fullName":"M Y, Toretz"}]}
|
||||||
|
{"pmid":"10023244","doi":"10.1016/S0099-2399(98)80146-5","date":"1999-02-19","journal":{"issn":"0099-2399","volume":"24","issue":"10","date":"1998-Oct-21","title":"Journal of endodontics"},"title":"The effects of estrogen deficiency on glycosylation of odontoblasts in rats.","description":"To investigate the effects of estrogen deficiency on odontoblast metabolism, we induced osteoporosis in rats by ovariectomy and examined the glycosylation of the matrix component in odontoblasts. Peanut agglutinin (PNA) lectin histochemistry, which detects D-galactose and N-acetylgalactosamine sugars, was conducted in incisor odontoblasts of ovariectomized (OVX) and sham-operated (sham) rats. At 5 wk after the operation, bone mineral density and serum level of estrogen in OVX rats were lower than those in sham rats. PNA binding sites were found in the odontoblasts in incisors, and the binding sites in OVX rats were much stronger than those in sham rats. Furthermore, PNA binding sites were localized at the predentin matrix in OVX rats, but the reaction in sham rats was not detected. Because D-galactose and N-acetylgalactosamine sugars bound to PNA are important constituents of proteoglycans in dentin matrix and the PNA binding sites reflect the proteoglycan production of odontoblasts, these results indicated that galactosyl glycosylation of proteoglycans in odontoblasts is influenced by estrogen deficiency in rat incisors.","language":"eng","subjects":[{"value":"Acetylgalactosamine","meshId":"D000116","registryNumber":null},{"value":"Animals","meshId":"D000818","registryNumber":null},{"value":"Binding Sites","meshId":"D001665","registryNumber":null},{"value":"Estrogens","meshId":"D004967","registryNumber":null},{"value":"Female","meshId":"D005260","registryNumber":null},{"value":"Galactose","meshId":"D005690","registryNumber":null},{"value":"Glycosylation","meshId":"D006031","registryNumber":null},{"value":"Incisor","meshId":"D007180","registryNumber":null},{"value":"Odontoblasts","meshId":"D009804","registryNumber":null},{"value":"Ovariectomy","meshId":"D010052","registryNumber":null},{"value":"Peanut Agglutinin","meshId":"D019887","registryNumber":null},{"value":"Proteoglycans","meshId":"D011509","registryNumber":null},{"value":"Rats","meshId":"D051381","registryNumber":null},{"value":"Rats, Sprague-Dawley","meshId":"D017207","registryNumber":null}],"publicationTypes":[{"value":"Clinical Trial","meshId":"D013485","registryNumber":null}],"grants":[],"authors":[{"lastName":"Yokose","foreName":"S","fullName":"S, Yokose"},{"lastName":"Zhungfeng","foreName":"C","fullName":"C, Zhungfeng"},{"lastName":"Tajima","foreName":"Y","fullName":"Y, Tajima"},{"lastName":"Fujieda","foreName":"K","fullName":"K, Fujieda"},{"lastName":"Katayama","foreName":"I","fullName":"I, Katayama"},{"lastName":"Katayama","foreName":"T","fullName":"T, Katayama"}]}
|
||||||
|
{"pmid":"10023333","doi":"10.1136/jcp.51.10.725","date":"1999-02-19","journal":{"issn":"0021-9746","volume":"51","issue":"10","date":"1998-Oct-01","title":"Journal of clinical pathology"},"title":"AIDS vaccine development: let a thousand flowers bloom.","description":null,"language":"eng","subjects":[{"value":"AIDS Vaccines","meshId":"D016915","registryNumber":null},{"value":"Avipoxvirus","meshId":"D018150","registryNumber":null},{"value":"HIV Infections","meshId":"D015658","registryNumber":null},{"value":"Humans","meshId":"D006801","registryNumber":null},{"value":"Simian Immunodeficiency Virus","meshId":"D015302","registryNumber":null},{"value":"Vaccines, Attenuated","meshId":"D014613","registryNumber":null},{"value":"Vaccines, DNA","meshId":"D019444","registryNumber":null}],"publicationTypes":[{"value":"Journal Article","meshId":"D016428","registryNumber":null},{"value":"Research Support, Non-U.S. Gov't","meshId":"D013485","registryNumber":null},{"value":"Review","meshId":"D016454","registryNumber":null}],"grants":[],"authors":[{"lastName":"Oxford","foreName":"J S","fullName":"J S, Oxford"},{"lastName":"Addawe","foreName":"M","fullName":"M, Addawe"},{"lastName":"Lambkin","foreName":"R","fullName":"R, Lambkin"}]}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue