add test to verify merge scholix on source

This commit is contained in:
Sandro La Bruzzo 2021-07-06 11:32:14 +02:00
parent 7d8db2eb8a
commit 4c54bd8742
3 changed files with 40 additions and 18 deletions

View File

@ -43,6 +43,10 @@ object SparkCreateScholix {
.map(r => (r.getId, r))(Encoders.tuple(Encoders.STRING, summaryEncoder))
val res: Array[((String, Relation), (String, ScholixSummary))] =relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left").take(10)
res.foreach(r =>println(r._1._2))
// relationDS.joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left")
// .map {input:((String,Relation), (String, ScholixSummary)) =>

View File

@ -1,30 +1,26 @@
package eu.dnetlib.dhp.sx.graph.scholix
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result}
import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF
import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.ScholixResolved
import eu.dnetlib.dhp.schema.oaf.{Relation, Result}
import eu.dnetlib.dhp.schema.sx.scholix.Scholix
import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary
import eu.dnetlib.dhp.sx.graph.bio.pubmed.AbstractVocabularyTest
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString}
import org.json4s.jackson.JsonMethods.parse
import org.junit.jupiter.api.Assertions._
import org.junit.jupiter.api.extension.ExtendWith
import org.junit.jupiter.api.{BeforeEach, Test}
import org.mockito.junit.jupiter.MockitoExtension
import java.io.{BufferedReader, InputStream, InputStreamReader}
import java.util.zip.GZIPInputStream
import scala.collection.JavaConverters._
import scala.io.Source
import scala.xml.pull.XMLEventReader
@ExtendWith(Array(classOf[MockitoExtension]))
class ScholixGraphTest extends AbstractVocabularyTest{
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
val mapper: ObjectMapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES,false)
@BeforeEach
@ -47,18 +43,28 @@ class ScholixGraphTest extends AbstractVocabularyTest{
assertEquals(result.size, items.size)
val d = result.find(s => s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty))
assertFalse(d.isDefined)
println(mapper.writeValueAsString(result.head))
}
@Test
def testScholixMergeOnSource():Unit = {
val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/merge_result_scholix")).mkString
val result:List[(Relation,ScholixSummary)] =inputRelations.lines.sliding(2).map(s => (s.head, s(1))).map(p => (mapper.readValue(p._1, classOf[Relation]),mapper.readValue(p._2, classOf[ScholixSummary]) )).toList
assertNotNull(result)
assertTrue(result.nonEmpty)
result.foreach(r => assertEquals(r._1.getSource, r._2.getId))
val scholix:List[Scholix] = result.map(r => ScholixUtils.scholixFromSource(r._1, r._2))
println(mapper.writeValueAsString(scholix.head))
}
@Test
def testScholixRelationshipsClean() = {
def testScholixRelationshipsClean(): Unit = {
val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/relation_transform.json")).mkString
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
@ -66,17 +72,9 @@ class ScholixGraphTest extends AbstractVocabularyTest{
val l:List[String] =json.extract[List[String]]
assertNotNull(l)
assertTrue(l.nonEmpty)
val relVocbaulary =ScholixUtils.relations
l.foreach(r => assertTrue(relVocbaulary.contains(r.toLowerCase)))
}

View File

@ -0,0 +1,20 @@
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","target":"50|doi_________::4af011e641e0ba286660fd24a3f603b7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-01-01","dataInfo":null}]}
{"id":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","localIdentifier":[{"identifier":"10.1111/1346-8138.14162","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["Guideline of SSc","Diagnostic criteria, severity classification and guidelines of systemic sclerosis"],"author":["Yoshihide Asano","Masatoshi Jinnin","Yasushi Kawaguchi","Masataka Kuwana","Daisuke Goto","Shinichi Sato","Kazuhiko Takehara","Masaru Hatano","Manabu Fujimoto","Naoki Mugii","Hironobu Ihn"],"date":["2018-04-23","2018-01-01"],"subject":null,"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","target":"50|doi_________::7e79063f205480e61ee7fdcf7ab03bad","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2004-11-01","dataInfo":null}]}
{"id":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","localIdentifier":[{"identifier":"10.1002/ajmg.a.30270","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["Clinical variability in a Noonan syndrome family with a newPTPN11 gene mutation"],"author":["D<>bora Romeo Bertola","Alexandre C. Pereira","Paulo S.L. de Oliveira","Chong A. Kim","Jos<6F> Eduardo Krieger"],"date":["2004-09-21T23:19:41Z","2004-11-01"],"subject":[{"scheme":"keywords","value":"Genetics(clinical)"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","target":"50|doi_________::edb21431e0271061e0dddc248300708a","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2010-08-16","dataInfo":null}]}
{"id":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","localIdentifier":[{"identifier":"10.1111/j.1440-1843.2010.01819.x","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["P. aeruginosa: host defence in the lung","Pseudomonas aeruginosa: Host defence in lung diseases"],"author":["Bryan J. WILLIAMS","Joanne DEHNBOSTEL","Timothy S. BLACKWELL"],"date":["2010-08-16","2010-08-16"],"subject":[{"scheme":"keywords","value":"Pulmonary and Respiratory Medicine"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f441c6243fd6ae381c520b42349b769","target":"50|doi_________::44ebec98169daae57c106eb1a1072aae","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-07-12","dataInfo":null}]}
{"id":"50|doi_________::3f441c6243fd6ae381c520b42349b769","localIdentifier":[{"identifier":"10.1007/s11901-018-0414-x","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["DILI Associated with Skin Reactions"],"author":["Sahand Rahnama-Moghadam","Hans L. Tillmann"],"date":["2018-07-12","2018-07-12"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::f767374d588a8d51de0f129261daa5a7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]}
{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::ff99f7ec03946fa4c8f413d59f75a547","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]}
{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","type":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","target":"50|doi_________::e2d40a313240d3eb979a3172103a4d7f","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2006-11-17","dataInfo":null}]}
{"id":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","localIdentifier":[{"identifier":"10.1007/s11096-006-9043-5","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["Patients attitudes towards and experiences of generic drug substitution in Norway"],"author":["Kjoenniksen, Inge","Lindbaek, Morten","Granas, Anne Gerd"],"date":["2006-11-17","2006-11-17"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","target":"50|doi_________::fa0760d1427b71b6cb3ffcc739751197","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-09-29","dataInfo":null}]}
{"id":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","localIdentifier":[{"identifier":"10.1080/10408398509527417","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["The genusallium. Part 2"],"author":["Gruffydd R. Fenwick","Anthony B. Hanley","John R. Whitaker"],"date":["2009-09-30T13:53:43Z","2009-09-29"],"subject":[{"scheme":"keywords","value":"General Medicine"}],"publisher":["Informa UK Limited"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","target":"50|pmid________::43fb246d61ba89b7f9825d9e02856d17","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-01-01","dataInfo":null}]}
{"id":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","localIdentifier":[{"identifier":"10.1111/j.1742-481x.2009.00634.x","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["Venous leg ulcers: patient concordance with compression therapy and its impact on healing and prevention of recurrence"],"author":["Moffatt, Christine","Kommala, Dheerendra","Dourdin, Nathalie","Choe, Yoonhee"],"date":["2009-01-01","2009-11-13T10:40:02Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null}
{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","target":"50|doi_________::1d47307b88d6bb6757f71bfc56686b74","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2015-01-01","dataInfo":null}]}
{"id":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","localIdentifier":[{"identifier":"10.1111/jocd.12148","schema":"doi", "url" :"http://dx.doi.org/10.1111/1346-8138.14162"}],"typology":"publication","subType":"Article","title":["Assessment of efficacy and tolerability of different concentrations of trichloroacetic acid vs\n. carbon dioxide laser in treatment of xanthelasma palpebrarum"],"author":["Basma Mourad","Lamia H. Elgarhy","Heba-Alla Ellakkawy","Nageh Elmahdy"],"date":["2015-08-07","2015-01-01"],"subject":[{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null}