added relation dates in all scholexplorer Datasources

This commit is contained in:
Sandro La Bruzzo 2021-06-29 12:02:03 +02:00
parent 075055eaca
commit 7e08655e5f
3 changed files with 27 additions and 17 deletions

View File

@ -45,7 +45,8 @@ case class HostedByMapType(openaire_id: String, datacite_name: String, official_
object DataciteToOAFTransformation { object DataciteToOAFTransformation {
val REL_TYPE_VALUE:String = "resultResult" val REL_TYPE_VALUE:String = "resultResult"
val DATE_RELATION_KEY = "RelationDate"
val subRelTypeMapping: Map[String,(String,String)] = Map( val subRelTypeMapping: Map[String,(String,String)] = Map(
"References" ->("IsReferencedBy","relationship"), "References" ->("IsReferencedBy","relationship"),
@ -547,8 +548,7 @@ val REL_TYPE_VALUE:String = "resultResult"
JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
relations = relations ::: generateRelations(rels,result.getId, if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null)
relations = relations ::: generateRelations(rels,result.getId)
} }
if (relations != null && relations.nonEmpty) { if (relations != null && relations.nonEmpty) {
List(result) ::: relations List(result) ::: relations
@ -557,7 +557,7 @@ val REL_TYPE_VALUE:String = "resultResult"
List(result) List(result)
} }
private def generateRelations(rels: List[RelatedIdentifierType], id:String):List[Relation] = { private def generateRelations(rels: List[RelatedIdentifierType], id:String, date:String):List[Relation] = {
rels rels
.filter(r => .filter(r =>
subRelTypeMapping.contains(r.relationType) && ( subRelTypeMapping.contains(r.relationType) && (
@ -575,6 +575,10 @@ val REL_TYPE_VALUE:String = "resultResult"
rel.setSubRelType(subRelType) rel.setSubRelType(subRelType)
rel.setRelClass(r.relationType) rel.setRelClass(r.relationType)
val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
rel.setProperties(List(dateProps).asJava)
rel.setSource(id) rel.setSource(id)
rel.setTarget(s"unresolved::${r.relatedIdentifier}::${r.relatedIdentifierType}") rel.setTarget(s"unresolved::${r.relatedIdentifier}::${r.relatedIdentifierType}")
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.sx.bio package eu.dnetlib.dhp.sx.bio
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils}
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation, StructuredProperty} import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation, StructuredProperty}
import org.json4s.DefaultFormats import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.JsonAST.{JField, JObject, JString}
@ -22,6 +22,8 @@ object BioDBToOAF {
val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
val SUBJ_CLASS = "Keywords" val SUBJ_CLASS = "Keywords"
val DATE_RELATION_KEY = "RelationDate"
val resolvedURL:Map[String,String] = Map( val resolvedURL:Map[String,String] = Map(
"genbank"-> "https://www.ncbi.nlm.nih.gov/nuccore/", "genbank"-> "https://www.ncbi.nlm.nih.gov/nuccore/",
"ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/", "ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/",
@ -77,7 +79,7 @@ object BioDBToOAF {
val relation_semantic= (json \ "RelationshipType" \ "Name").extract[String] val relation_semantic= (json \ "RelationshipType" \ "Name").extract[String]
val date = (json \ "LinkedPublicationDate").extract[String] val date = GraphCleaningFunctions.cleanDate((json \ "LinkedPublicationDate").extract[String])
createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type),collectedFromMap("elsevier"),"relationship", relation_semantic, date) createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type),collectedFromMap("elsevier"),"relationship", relation_semantic, date)
@ -134,8 +136,8 @@ object BioDBToOAF {
} }
if (input.date!= null && input.date.nonEmpty) { if (input.date!= null && input.date.nonEmpty) {
val dt = input.date.head val dt = input.date.head
i.setDateofacceptance(OafMapperUtils.field(dt, DATA_INFO)) i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO))
d.setDateofacceptance(OafMapperUtils.field(dt, DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO))
} }
d d
} }
@ -177,7 +179,7 @@ object BioDBToOAF {
JObject(dateOBJ) <- json \ "dates" JObject(dateOBJ) <- json \ "dates"
JField("date", JString(date)) <- dateOBJ JField("date", JString(date)) <- dateOBJ
JField("date_info", JString(date_info)) <- dateOBJ JField("date_info", JString(date_info)) <- dateOBJ
} yield UniprotDate(date, date_info) } yield UniprotDate(GraphCleaningFunctions.cleanDate(date), date_info)
val subjects: List[String] = (json \\ "subjects").extractOrElse[List[String]](null) val subjects: List[String] = (json \\ "subjects").extractOrElse[List[String]](null)
@ -248,7 +250,10 @@ object BioDBToOAF {
rel.setSource(sourceId) rel.setSource(sourceId)
rel.setTarget(s"unresolved::$pid::$pidType") rel.setTarget(s"unresolved::$pid::$pidType")
rel.setValidationDate(date)
val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
rel.setProperties(List(dateProps).asJava)
rel.getTarget.startsWith("unresolved") rel.getTarget.startsWith("unresolved")
rel.setCollectedfrom(List(collectedFrom).asJava) rel.setCollectedfrom(List(collectedFrom).asJava)
@ -354,7 +359,7 @@ object BioDBToOAF {
JField("IDURL", JString(idUrl)) <- identifier JField("IDURL", JString(idUrl)) <- identifier
JField("ID", JString(id)) <- identifier JField("ID", JString(id)) <- identifier
} yield EBILinks(relation, publicationDate, title, pmid, id, idScheme, idUrl) } yield EBILinks(relation, GraphCleaningFunctions.cleanDate(publicationDate), title, pmid, id, idScheme, idUrl)
} }
@ -384,9 +389,9 @@ object BioDBToOAF {
i.setCollectedfrom(collectedFromMap("ebi")) i.setCollectedfrom(collectedFromMap("ebi"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)
i.setDateofacceptance(OafMapperUtils.field(input.date, DATA_INFO)) i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO))
d.setDateofacceptance(OafMapperUtils.field(input.date, DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO))
List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", input.date)) List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", GraphCleaningFunctions.cleanDate(input.date)))
} }
} }

View File

@ -2,8 +2,9 @@ package eu.dnetlib.dhp.sx.bio.pubmed
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
object PubMedToOaf { object PubMedToOaf {
@ -93,14 +94,14 @@ object PubMedToOaf {
.map(t => t._1 + t._2) .map(t => t._1 + t._2)
if (urlLists != null) if (urlLists != null)
i.setUrl(urlLists.asJava) i.setUrl(urlLists.asJava)
i.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo)) i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
i.setCollectedfrom(collectedFrom) i.setCollectedfrom(collectedFrom)
result.setPid(pidList.asJava) result.setPid(pidList.asJava)
if (article.getJournal != null && result.isInstanceOf[Publication]) if (article.getJournal != null && result.isInstanceOf[Publication])
result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal)) result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal))
result.setCollectedfrom(List(collectedFrom).asJava) result.setCollectedfrom(List(collectedFrom).asJava)
result.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo)) result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
if (article.getTitle == null || article.getTitle.isEmpty) if (article.getTitle == null || article.getTitle.isEmpty)
return null return null