From 7e08655e5fc7cbbabdeb7e29b994d690dabfdedb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 29 Jun 2021 12:02:03 +0200 Subject: [PATCH] added relation dates in all scholexplorer Datasources --- .../DataciteToOAFTransformation.scala | 12 ++++++--- .../eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala | 25 +++++++++++-------- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 7 +++--- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index b9f2b8501..0cdf0accb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -45,7 +45,8 @@ case class HostedByMapType(openaire_id: String, datacite_name: String, official_ object DataciteToOAFTransformation { -val REL_TYPE_VALUE:String = "resultResult" + val REL_TYPE_VALUE:String = "resultResult" + val DATE_RELATION_KEY = "RelationDate" val subRelTypeMapping: Map[String,(String,String)] = Map( "References" ->("IsReferencedBy","relationship"), @@ -547,8 +548,7 @@ val REL_TYPE_VALUE:String = "resultResult" JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) - - relations = relations ::: generateRelations(rels,result.getId) + relations = relations ::: generateRelations(rels,result.getId, if (i_date.isDefined && i_date.get.isDefined) i_date.get.get else null) } if (relations != null && relations.nonEmpty) { List(result) ::: relations @@ -557,7 +557,7 @@ val REL_TYPE_VALUE:String = "resultResult" List(result) } - private def generateRelations(rels: List[RelatedIdentifierType], id:String):List[Relation] = { + private def generateRelations(rels: List[RelatedIdentifierType], id:String, date:String):List[Relation] = { rels .filter(r => subRelTypeMapping.contains(r.relationType) && ( @@ -575,6 +575,10 @@ val REL_TYPE_VALUE:String = "resultResult" rel.setSubRelType(subRelType) rel.setRelClass(r.relationType) + val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) + + rel.setProperties(List(dateProps).asJava) + rel.setSource(id) rel.setTarget(s"unresolved::${r.relatedIdentifier}::${r.relatedIdentifierType}") rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index ce4b55684..f435c56dd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils} import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation, StructuredProperty} import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} @@ -22,6 +22,8 @@ object BioDBToOAF { val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") val SUBJ_CLASS = "Keywords" + val DATE_RELATION_KEY = "RelationDate" + val resolvedURL:Map[String,String] = Map( "genbank"-> "https://www.ncbi.nlm.nih.gov/nuccore/", "ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/", @@ -77,7 +79,7 @@ object BioDBToOAF { val relation_semantic= (json \ "RelationshipType" \ "Name").extract[String] - val date = (json \ "LinkedPublicationDate").extract[String] + val date = GraphCleaningFunctions.cleanDate((json \ "LinkedPublicationDate").extract[String]) createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type),collectedFromMap("elsevier"),"relationship", relation_semantic, date) @@ -134,8 +136,8 @@ object BioDBToOAF { } if (input.date!= null && input.date.nonEmpty) { val dt = input.date.head - i.setDateofacceptance(OafMapperUtils.field(dt, DATA_INFO)) - d.setDateofacceptance(OafMapperUtils.field(dt, DATA_INFO)) + i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) + d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) } d } @@ -177,7 +179,7 @@ object BioDBToOAF { JObject(dateOBJ) <- json \ "dates" JField("date", JString(date)) <- dateOBJ JField("date_info", JString(date_info)) <- dateOBJ - } yield UniprotDate(date, date_info) + } yield UniprotDate(GraphCleaningFunctions.cleanDate(date), date_info) val subjects: List[String] = (json \\ "subjects").extractOrElse[List[String]](null) @@ -248,7 +250,10 @@ object BioDBToOAF { rel.setSource(sourceId) rel.setTarget(s"unresolved::$pid::$pidType") - rel.setValidationDate(date) + + val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) + + rel.setProperties(List(dateProps).asJava) rel.getTarget.startsWith("unresolved") rel.setCollectedfrom(List(collectedFrom).asJava) @@ -354,7 +359,7 @@ object BioDBToOAF { JField("IDURL", JString(idUrl)) <- identifier JField("ID", JString(id)) <- identifier - } yield EBILinks(relation, publicationDate, title, pmid, id, idScheme, idUrl) + } yield EBILinks(relation, GraphCleaningFunctions.cleanDate(publicationDate), title, pmid, id, idScheme, idUrl) } @@ -384,9 +389,9 @@ object BioDBToOAF { i.setCollectedfrom(collectedFromMap("ebi")) d.setInstance(List(i).asJava) - i.setDateofacceptance(OafMapperUtils.field(input.date, DATA_INFO)) - d.setDateofacceptance(OafMapperUtils.field(input.date, DATA_INFO)) + i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) + d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) - List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", input.date)) + List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", GraphCleaningFunctions.cleanDate(input.date))) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index e240cd070..2a1392353 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -2,8 +2,9 @@ package eu.dnetlib.dhp.sx.bio.pubmed import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils, PidType} +import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ + import scala.collection.JavaConverters._ object PubMedToOaf { @@ -93,14 +94,14 @@ object PubMedToOaf { .map(t => t._1 + t._2) if (urlLists != null) i.setUrl(urlLists.asJava) - i.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo)) + i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) i.setCollectedfrom(collectedFrom) result.setPid(pidList.asJava) if (article.getJournal != null && result.isInstanceOf[Publication]) result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal)) result.setCollectedfrom(List(collectedFrom).asJava) - result.setDateofacceptance(OafMapperUtils.field(article.getDate, dataInfo)) + result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)) if (article.getTitle == null || article.getTitle.isEmpty) return null