forked from antonis.lempesis/dnet-hadoop
added relation dates in all scholexplorer Datasources
This commit is contained in:
parent
075055eaca
commit
7e08655e5f
|
@ -45,7 +45,8 @@ case class HostedByMapType(openaire_id: String, datacite_name: String, official_
|
||||||
|
|
||||||
object DataciteToOAFTransformation {
|
object DataciteToOAFTransformation {
|
||||||
|
|
||||||
val REL_TYPE_VALUE:String = "resultResult"
|
val REL_TYPE_VALUE:String = "resultResult"
|
||||||
|
val DATE_RELATION_KEY = "RelationDate"
|
||||||
|
|
||||||
val subRelTypeMapping: Map[String,(String,String)] = Map(
|
val subRelTypeMapping: Map[String,(String,String)] = Map(
|
||||||
"References" ->("IsReferencedBy","relationship"),
|
"References" ->("IsReferencedBy","relationship"),
|
||||||
|
@ -547,8 +548,7 @@ val REL_TYPE_VALUE:String = "resultResult"
|
||||||
JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier
|
JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier
|
||||||
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
|
} yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType)
|
||||||
|
|
||||||
|
relations = relations ::: generateRelations(rels,result.getId, if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null)
|
||||||
relations = relations ::: generateRelations(rels,result.getId)
|
|
||||||
}
|
}
|
||||||
if (relations != null && relations.nonEmpty) {
|
if (relations != null && relations.nonEmpty) {
|
||||||
List(result) ::: relations
|
List(result) ::: relations
|
||||||
|
@ -557,7 +557,7 @@ val REL_TYPE_VALUE:String = "resultResult"
|
||||||
List(result)
|
List(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def generateRelations(rels: List[RelatedIdentifierType], id:String):List[Relation] = {
|
private def generateRelations(rels: List[RelatedIdentifierType], id:String, date:String):List[Relation] = {
|
||||||
rels
|
rels
|
||||||
.filter(r =>
|
.filter(r =>
|
||||||
subRelTypeMapping.contains(r.relationType) && (
|
subRelTypeMapping.contains(r.relationType) && (
|
||||||
|
@ -575,6 +575,10 @@ val REL_TYPE_VALUE:String = "resultResult"
|
||||||
rel.setSubRelType(subRelType)
|
rel.setSubRelType(subRelType)
|
||||||
rel.setRelClass(r.relationType)
|
rel.setRelClass(r.relationType)
|
||||||
|
|
||||||
|
val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
|
||||||
|
|
||||||
|
rel.setProperties(List(dateProps).asJava)
|
||||||
|
|
||||||
rel.setSource(id)
|
rel.setSource(id)
|
||||||
rel.setTarget(s"unresolved::${r.relatedIdentifier}::${r.relatedIdentifierType}")
|
rel.setTarget(s"unresolved::${r.relatedIdentifier}::${r.relatedIdentifierType}")
|
||||||
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package eu.dnetlib.dhp.sx.bio
|
package eu.dnetlib.dhp.sx.bio
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils}
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation, StructuredProperty}
|
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation, StructuredProperty}
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||||
|
@ -22,6 +22,8 @@ object BioDBToOAF {
|
||||||
val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
|
val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
|
||||||
val SUBJ_CLASS = "Keywords"
|
val SUBJ_CLASS = "Keywords"
|
||||||
|
|
||||||
|
val DATE_RELATION_KEY = "RelationDate"
|
||||||
|
|
||||||
val resolvedURL:Map[String,String] = Map(
|
val resolvedURL:Map[String,String] = Map(
|
||||||
"genbank"-> "https://www.ncbi.nlm.nih.gov/nuccore/",
|
"genbank"-> "https://www.ncbi.nlm.nih.gov/nuccore/",
|
||||||
"ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/",
|
"ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/",
|
||||||
|
@ -77,7 +79,7 @@ object BioDBToOAF {
|
||||||
|
|
||||||
val relation_semantic= (json \ "RelationshipType" \ "Name").extract[String]
|
val relation_semantic= (json \ "RelationshipType" \ "Name").extract[String]
|
||||||
|
|
||||||
val date = (json \ "LinkedPublicationDate").extract[String]
|
val date = GraphCleaningFunctions.cleanDate((json \ "LinkedPublicationDate").extract[String])
|
||||||
|
|
||||||
createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type),collectedFromMap("elsevier"),"relationship", relation_semantic, date)
|
createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type),collectedFromMap("elsevier"),"relationship", relation_semantic, date)
|
||||||
|
|
||||||
|
@ -134,8 +136,8 @@ object BioDBToOAF {
|
||||||
}
|
}
|
||||||
if (input.date!= null && input.date.nonEmpty) {
|
if (input.date!= null && input.date.nonEmpty) {
|
||||||
val dt = input.date.head
|
val dt = input.date.head
|
||||||
i.setDateofacceptance(OafMapperUtils.field(dt, DATA_INFO))
|
i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO))
|
||||||
d.setDateofacceptance(OafMapperUtils.field(dt, DATA_INFO))
|
d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO))
|
||||||
}
|
}
|
||||||
d
|
d
|
||||||
}
|
}
|
||||||
|
@ -177,7 +179,7 @@ object BioDBToOAF {
|
||||||
JObject(dateOBJ) <- json \ "dates"
|
JObject(dateOBJ) <- json \ "dates"
|
||||||
JField("date", JString(date)) <- dateOBJ
|
JField("date", JString(date)) <- dateOBJ
|
||||||
JField("date_info", JString(date_info)) <- dateOBJ
|
JField("date_info", JString(date_info)) <- dateOBJ
|
||||||
} yield UniprotDate(date, date_info)
|
} yield UniprotDate(GraphCleaningFunctions.cleanDate(date), date_info)
|
||||||
|
|
||||||
val subjects: List[String] = (json \\ "subjects").extractOrElse[List[String]](null)
|
val subjects: List[String] = (json \\ "subjects").extractOrElse[List[String]](null)
|
||||||
|
|
||||||
|
@ -248,7 +250,10 @@ object BioDBToOAF {
|
||||||
rel.setSource(sourceId)
|
rel.setSource(sourceId)
|
||||||
rel.setTarget(s"unresolved::$pid::$pidType")
|
rel.setTarget(s"unresolved::$pid::$pidType")
|
||||||
|
|
||||||
rel.setValidationDate(date)
|
|
||||||
|
val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
|
||||||
|
|
||||||
|
rel.setProperties(List(dateProps).asJava)
|
||||||
|
|
||||||
rel.getTarget.startsWith("unresolved")
|
rel.getTarget.startsWith("unresolved")
|
||||||
rel.setCollectedfrom(List(collectedFrom).asJava)
|
rel.setCollectedfrom(List(collectedFrom).asJava)
|
||||||
|
@ -354,7 +359,7 @@ object BioDBToOAF {
|
||||||
JField("IDURL", JString(idUrl)) <- identifier
|
JField("IDURL", JString(idUrl)) <- identifier
|
||||||
JField("ID", JString(id)) <- identifier
|
JField("ID", JString(id)) <- identifier
|
||||||
|
|
||||||
} yield EBILinks(relation, publicationDate, title, pmid, id, idScheme, idUrl)
|
} yield EBILinks(relation, GraphCleaningFunctions.cleanDate(publicationDate), title, pmid, id, idScheme, idUrl)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -384,9 +389,9 @@ object BioDBToOAF {
|
||||||
|
|
||||||
i.setCollectedfrom(collectedFromMap("ebi"))
|
i.setCollectedfrom(collectedFromMap("ebi"))
|
||||||
d.setInstance(List(i).asJava)
|
d.setInstance(List(i).asJava)
|
||||||
i.setDateofacceptance(OafMapperUtils.field(input.date, DATA_INFO))
|
i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO))
|
||||||
d.setDateofacceptance(OafMapperUtils.field(input.date, DATA_INFO))
|
d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO))
|
||||||
|
|
||||||
List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", input.date))
|
List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", GraphCleaningFunctions.cleanDate(input.date)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,8 +2,9 @@ package eu.dnetlib.dhp.sx.bio.pubmed
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType}
|
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
|
||||||
import eu.dnetlib.dhp.schema.oaf._
|
import eu.dnetlib.dhp.schema.oaf._
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
object PubMedToOaf {
|
object PubMedToOaf {
|
||||||
|
@ -93,14 +94,14 @@ object PubMedToOaf {
|
||||||
.map(t => t._1 + t._2)
|
.map(t => t._1 + t._2)
|
||||||
if (urlLists != null)
|
if (urlLists != null)
|
||||||
i.setUrl(urlLists.asJava)
|
i.setUrl(urlLists.asJava)
|
||||||
i.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo))
|
i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
|
||||||
i.setCollectedfrom(collectedFrom)
|
i.setCollectedfrom(collectedFrom)
|
||||||
result.setPid(pidList.asJava)
|
result.setPid(pidList.asJava)
|
||||||
if (article.getJournal != null && result.isInstanceOf[Publication])
|
if (article.getJournal != null && result.isInstanceOf[Publication])
|
||||||
result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal))
|
result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal))
|
||||||
result.setCollectedfrom(List(collectedFrom).asJava)
|
result.setCollectedfrom(List(collectedFrom).asJava)
|
||||||
|
|
||||||
result.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo))
|
result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
|
||||||
|
|
||||||
if (article.getTitle == null || article.getTitle.isEmpty)
|
if (article.getTitle == null || article.getTitle.isEmpty)
|
||||||
return null
|
return null
|
||||||
|
|
Loading…
Reference in New Issue