2021-06-18 16:41:24 +02:00
|
|
|
package eu.dnetlib.dhp.sx.bio.pubmed
|
2021-06-16 14:56:24 +02:00
|
|
|
|
|
|
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
2021-06-29 12:02:03 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
|
2021-06-16 14:56:24 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf._
|
2021-06-29 12:02:03 +02:00
|
|
|
|
2021-06-16 14:56:24 +02:00
|
|
|
import scala.collection.JavaConverters._
|
|
|
|
|
|
|
|
object PubMedToOaf {
|
|
|
|
|
|
|
|
val SUBJ_CLASS = "keywords"
|
|
|
|
val urlMap = Map(
|
|
|
|
"pmid" -> "https://pubmed.ncbi.nlm.nih.gov/",
|
|
|
|
"doi" -> "https://dx.doi.org/"
|
|
|
|
)
|
|
|
|
|
|
|
|
def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = {
|
|
|
|
val result_typologies = getVocabularyTerm("dnet:result_typologies", vocabularies, cobjQualifier.getClassid)
|
|
|
|
result_typologies.getClassid match {
|
|
|
|
case "dataset" => new Dataset
|
|
|
|
case "publication" => new Publication
|
|
|
|
case "other" => new OtherResearchProduct
|
|
|
|
case "software" => new Software
|
2021-06-18 16:41:24 +02:00
|
|
|
case _ => null
|
2021-06-16 14:56:24 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
def mapJournal(j: PMJournal): Journal = {
|
|
|
|
if (j == null)
|
|
|
|
return null
|
|
|
|
val journal = new Journal
|
|
|
|
|
|
|
|
journal.setDataInfo(dataInfo)
|
|
|
|
journal.setName(j.getTitle)
|
|
|
|
journal.setVol(j.getVolume)
|
|
|
|
journal.setIssnPrinted(j.getIssn)
|
|
|
|
journal.setIss(j.getIssue)
|
|
|
|
journal
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = {
|
|
|
|
val a = vocabularies.getSynonymAsQualifier(vocabularyName, term)
|
|
|
|
val b = vocabularies.getTermAsQualifier(vocabularyName, term)
|
|
|
|
if (a == null) b else a
|
|
|
|
}
|
|
|
|
|
|
|
|
val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
|
|
|
|
val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central")
|
|
|
|
|
|
|
|
def convert(article: PMArticle, vocabularies: VocabularyGroup): Result = {
|
|
|
|
|
|
|
|
if (article.getPublicationTypes == null)
|
|
|
|
return null
|
|
|
|
val i = new Instance
|
|
|
|
var pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo))
|
2021-06-18 16:41:24 +02:00
|
|
|
if (pidList == null)
|
2021-06-16 14:56:24 +02:00
|
|
|
return null
|
|
|
|
if (article.getDoi != null) {
|
|
|
|
pidList = pidList ::: List(OafMapperUtils.structuredProperty(article.getDoi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo))
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the article contains the typology Journal Article then we apply this type
|
|
|
|
//else We have to find a terms that match the vocabulary otherwise we discard it
|
|
|
|
val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue))
|
|
|
|
if (ja.isDefined) {
|
|
|
|
val cojbCategory = getVocabularyTerm("dnet:publication_resource", vocabularies, ja.get.getValue)
|
|
|
|
i.setInstancetype(cojbCategory)
|
|
|
|
} else {
|
|
|
|
val i_type = article.getPublicationTypes.asScala
|
|
|
|
.map(s => getVocabularyTerm("dnet:publication_resource", vocabularies, s.getValue))
|
|
|
|
.find(q => q != null)
|
|
|
|
if (i_type.isDefined)
|
|
|
|
i.setInstancetype(i_type.get)
|
|
|
|
else
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
val result = createResult(i.getInstancetype, vocabularies)
|
|
|
|
if (result == null)
|
|
|
|
return result
|
|
|
|
result.setDataInfo(dataInfo)
|
|
|
|
i.setPid(pidList.asJava)
|
|
|
|
result.setInstance(List(i).asJava)
|
|
|
|
|
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection breakOut)
|
2021-06-16 14:56:24 +02:00
|
|
|
val urlLists: List[String] = pidList
|
|
|
|
.map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue))
|
|
|
|
.filter(t => t._1.nonEmpty)
|
|
|
|
.map(t => t._1 + t._2)
|
2021-06-18 16:41:24 +02:00
|
|
|
if (urlLists != null)
|
2021-06-16 14:56:24 +02:00
|
|
|
i.setUrl(urlLists.asJava)
|
2021-06-29 12:02:03 +02:00
|
|
|
i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
|
2021-06-16 14:56:24 +02:00
|
|
|
i.setCollectedfrom(collectedFrom)
|
|
|
|
result.setPid(pidList.asJava)
|
|
|
|
if (article.getJournal != null && result.isInstanceOf[Publication])
|
|
|
|
result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal))
|
|
|
|
result.setCollectedfrom(List(collectedFrom).asJava)
|
|
|
|
|
2021-06-29 12:02:03 +02:00
|
|
|
result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
|
2021-06-16 14:56:24 +02:00
|
|
|
|
|
|
|
if (article.getTitle == null || article.getTitle.isEmpty)
|
|
|
|
return null
|
|
|
|
result.setTitle(List(OafMapperUtils.structuredProperty(article.getTitle, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo)).asJava)
|
|
|
|
|
|
|
|
if (article.getDescription != null && article.getDescription.nonEmpty)
|
|
|
|
result.setDescription(List(OafMapperUtils.field(article.getDescription, dataInfo)).asJava)
|
|
|
|
|
|
|
|
if (article.getLanguage != null) {
|
|
|
|
|
|
|
|
val term = vocabularies.getSynonymAsQualifier("dnet:languages", article.getLanguage)
|
|
|
|
if (term != null)
|
|
|
|
result.setLanguage(term)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => OafMapperUtils.structuredProperty(s.getValue, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, dataInfo))(collection breakOut)
|
|
|
|
if (subjects != null)
|
2021-06-16 14:56:24 +02:00
|
|
|
result.setSubject(subjects.asJava)
|
|
|
|
|
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
val authors: List[Author] = article.getAuthors.asScala.zipWithIndex.map { case (a, index) =>
|
2021-06-16 14:56:24 +02:00
|
|
|
val author = new Author()
|
|
|
|
author.setName(a.getForeName)
|
|
|
|
author.setSurname(a.getLastName)
|
|
|
|
author.setFullname(a.getFullName)
|
|
|
|
author.setRank(index + 1)
|
|
|
|
author
|
|
|
|
}(collection breakOut)
|
|
|
|
|
|
|
|
|
2021-06-18 16:41:24 +02:00
|
|
|
if (authors != null && authors.nonEmpty)
|
2021-06-16 14:56:24 +02:00
|
|
|
result.setAuthor(authors.asJava)
|
|
|
|
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
|
|
|
|
|
|
|
|
|
|
|
result.setId(article.getPmid)
|
|
|
|
|
|
|
|
val id = IdentifierFactory.createIdentifier(result)
|
|
|
|
if (article.getPmid.equalsIgnoreCase(id))
|
|
|
|
return null
|
|
|
|
result.setId(id)
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|