From 9a0ca0296a0f13f0c1c3db35d3f9190f39579983 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 7 Apr 2022 13:17:50 +0200 Subject: [PATCH] added mobidb constants configuration and test --- .../dhp/bioschema/oozie_app/workflow.xml | 6 +- .../bioschema/BioschemaModelConstants.scala | 22 +++- .../dnetlib/dhp/bioschema/mobidb_record.json | 75 +++++++++++ .../BioschemaDataciteToOAFTest.scala | 13 ++ .../bioschema/oozie_app/workflow.xml | 2 +- .../rdfconverter/bioschema/ConverterTest.java | 11 ++ .../dhp/rdfconverter/bioschema/mobidb.nq | 118 ++++++++++++++++++ 7 files changed, 238 insertions(+), 9 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/bioschema/mobidb_record.json create mode 100644 dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/mobidb.nq diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml index 92af9a8ef..deacb878b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml @@ -2,13 +2,13 @@ mainPath - /data/bioschema/ped + /data/bioschema/mobidb the working path of Bioschema stores datasourceKey - ped - the key that identifies the datasource (eg ped, disprot) + mobidb + the key that identifies the datasource (eg ped, disprot, mobidb) profile diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala index 729aa0ec0..86b6d2980 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala @@ -75,13 +75,16 @@ object BioschemaModelConstants { val PED_PREFIX: String = "ped_________" val DISPROT_PREFIX: String = "disprot_____" + val MOBIDB_PREFIX: String = "mobidb______" val resolvedURLPattern: Map[String, String] = Map( "https://identifiers.org/pubmed:" -> "pubmed", "http://purl.uniprot.org/uniprot/" -> "uniprot", "https://identifiers.org/uniprot:" -> "uniprot", - "https://identifiers.org/disprot:" -> "disprot" + "https://identifiers.org/disprot:" -> "disprot", + "https://identifiers.org/mobidb:" -> "mobidb" ) + //TODO create DatasourceId and update those value val collectedFromMap: Map[String, KeyValue] = { val PEDCollectedFrom: KeyValue = OafMapperUtils.keyValue( @@ -89,21 +92,30 @@ object BioschemaModelConstants { "Protein Ensemble Database" ) PEDCollectedFrom.setDataInfo(DATA_INFO) + val DISPROTCollectedFrom: KeyValue = OafMapperUtils.keyValue( "10|disprot_____::disprotDatasourceId", "DisProt, the database of intrinsically disordered proteins" ) DISPROTCollectedFrom.setDataInfo(DATA_INFO) + val MOBIDBCollectedFrom: KeyValue = OafMapperUtils.keyValue( + "10|mobidb______::mobidbDatasourceId", + "MobiDB, a database of protein disorder and mobility annotations" + ) + MOBIDBCollectedFrom.setDataInfo(DATA_INFO) + Map( - "ped" -> PEDCollectedFrom, - "disprot" -> DISPROTCollectedFrom + "ped" -> PEDCollectedFrom, + "disprot" -> DISPROTCollectedFrom, + "mobidb" -> MOBIDBCollectedFrom ) } val datasourceKeyPrefix: Map[String, String] = Map( - "ped" -> PED_PREFIX, - "disprot" -> DISPROT_PREFIX + "ped" -> PED_PREFIX, + "disprot" -> DISPROT_PREFIX, + "mobidb" -> MOBIDB_PREFIX ) val REL_TYPE_VALUE: String = "resultResult" diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/bioschema/mobidb_record.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/bioschema/mobidb_record.json new file mode 100644 index 000000000..5a7dce517 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/bioschema/mobidb_record.json @@ -0,0 +1,75 @@ +{ + "id": "Q9UPN6", + "types": { + "resourceType": "Protein", + "resourceTypeGeneral": "Dataset" + }, + "creators": [], + "identifiers": [ + { + "identifier": "https://mobidb.org/Q9UPN6", + "identifierType": "URL" + } + ], + "relatedIdentifiers": [ + { + "relationType": "IsIdenticalTo", + "relatedIdentifier": "http://purl.uniprot.org/uniprot/Q9UPN6", + "relatedIdentifierType": "URL" + } + ], + "alternateIdentifiers": [ + { + "alternateIdentifier": "https://identifiers.org/mobidb:Q9UPN6" + } + ], + "descriptions": [], + "titles": [ + { + "title": "Q9UPN6 - SR-related and CTD-associated factor 8 " + } + ], + "dates": [ + { + "date": "2021-12-06T14:49:15", + "dateType": "Collected" + } + ], + "subjects": [ + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076", + "value": "Disorder", + "subjectScheme": "IDPO:00076" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076", + "value": "Disorder", + "subjectScheme": "IDPO:00076" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076", + "value": "Disorder", + "subjectScheme": "IDPO:00076" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076", + "value": "Disorder", + "subjectScheme": "IDPO:00076" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076", + "value": "Disorder", + "subjectScheme": "IDPO:00076" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076", + "value": "Disorder", + "subjectScheme": "IDPO:00076" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00076", + "value": "Disorder", + "subjectScheme": "IDPO:00076" + } + ] +} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/bioschema/BioschemaDataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/bioschema/BioschemaDataciteToOAFTest.scala index 7ffe10c2c..5699f45c9 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/bioschema/BioschemaDataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/bioschema/BioschemaDataciteToOAFTest.scala @@ -86,4 +86,17 @@ class BioschemaDataciteToOAFTest { println("----------------------------") }) } + + @Test + def testMOBIDBMapping(): Unit = { + val record = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/bioschema/mobidb_record.json")) + .mkString + val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) + val res: List[Oaf] = BioschemaToOAFTransformation.generateOAF(record, true, "mobidb", "protein") + res.foreach(r => { + println(mapper.writeValueAsString(r)) + println("----------------------------") + }) + } } diff --git a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml index 86508d25a..42ef603bc 100644 --- a/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-rdfconverter/src/main/resources/eu/dnetlib/dhp/rdfconverter/bioschema/oozie_app/workflow.xml @@ -2,7 +2,7 @@ workingPath - /data/bioschema/ped/ + /data/bioschema/mobidb/ the working path diff --git a/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java b/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java index 15cda0118..0be32d5cb 100644 --- a/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java +++ b/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java @@ -36,4 +36,15 @@ public class ConverterTest { logger.info("JSON DATACITE >> " + r); }); } + + @Test + public void mobidbToDataciteTest() throws Exception { + InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/mobidb.nq"); + String nq = IOUtils.toString(is); + RDFConverter converter = new RDFConverter(); + ArrayList results = converter.nQuadsFile2DataciteJson(nq, "Protein"); + results.stream().forEach(r -> { + logger.info("JSON DATACITE >> " + r); + }); + } } diff --git a/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/mobidb.nq b/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/mobidb.nq new file mode 100644 index 000000000..bb6f717ec --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/test/resources/eu/dnetlib/dhp/rdfconverter/bioschema/mobidb.nq @@ -0,0 +1,118 @@ + . + "2021-12-06T14:49:15"^^ . + . + . + . + "MEAVKTFNSELYSLNDYKPPISKAKMTQITKAAIKAIKFYKHVVQSVEKFIQKCKPEYKVPGLYVIDSIVRQSRHQFGQEKDVFAPRFSNNIISTFQNLYRCPGDDKSKIVRVLNLWQKNNVFKSEIIQPLLDMAAGIPPPVVTPVLASTTTAMSNTPGTPVTPVTPANVVQGLPDPWVSQITNTDTLAAVAQILQSPQGQQLQQLIQTLQIQQQKPQPSILQALDAGLVVQLQALTAQLTAAAAAANTLTPLEQGVSFNKKLMDRFDFGEDSEHSEEPKKEIPASQLSHVSESVNNSIFHQIAEQLQQQNLEHLRQQLLEQQQPQKATPQDSQEGTFGSEHSASPSQGSSQQHFLEPEVNLDDSIDIQQQDMDIDEGQDGVEEEVFEQEAKKVAVRSRSRTHSRSRSRSPRKRRSRSRSGSRKRKHRKRSRSRSRERKRKSSRSYSSERRAREREKERQKKGLPPIRSKTLSVCSTTLWVGQVDKKATQQDLTNLFEEFGQIESINMIPPRGCAYVCMVHRQDAFRALQKLSSGSYKIGSKVIKIAWALNKGVKTEYKQFWDVDLGVTYIPWEKVKVDDLEGFAEGGMIDQETVNTEWETVKSSEPVKETVQTTQSPTPVEKETVVTTQAEVFPPPVAMLQIPVAPAVPTVSLVPPAFPVSMPVPPPGFSPIPPPPFLRASFNPSQPPPGFMPPPVPPPVVPPPTIPPVVPTSLVQPSLSMTPETVKDVGFGSLVIPGGSVASNLATSALPAGNVFNAPTKQAEPEEKVPHLIDHQISSGENTRSVIPNDISSNAAILGGQPPNVTSNSGILGVQRPNVSSNSEILGVRPSNVSSSSGIIAAQPPNILNNSGILGIQPPSVSNSSGLLGVLPPNIPNNSGLVGVQPPNVPNTPGLLGTQPPAGPQNLPPLSIPNQRMPTMPMLDIRPGLIPQAPGPRFPLIQPGIPPQRGIPPPSVLDSALHPPPRGPFPPGDIFSQPERPFLAPGRQSVDNVTNPEKRIPLGNDNIQQEGDRDYRFPPIETRESISRPPPVDVRDVVGRPIDPREGPGRPPLDGRDHFGRPPVDIRENLVRPGIDHLGRRDHFGFNPEKPWGHRGDFDEREHRVLPVYGGPKGLHEERGRFRSGNYRFDPRSGPWNRGFGQEVHRDFDDRRRPWERQRDRDDRDFDFCREMNGNRLGRDRIQNTWVPPPHARVFDYFEGATSQRKGDNVPQVNGENTERHAQPPPIPVQNDPELYEKLTSSNEINKEKSDTVADIESEPVVESTETEGT" . + . + . + . + . + . + . + . + . + "https://identifiers.org/mobidb:Q9UPN6" . + "https://mobidb.org/#2020-09" . + "SR-related and CTD-associated factor 8" . + . + . + . + . + . + "Protein disorder content predicted by MobiDB-lite" . + . + . + "Protein disorder content" . + . + "2.93E-1" . + . + "1271" . + "1" . + . + . + "Protein disordered region predicted by MobiDB-lite" . + . + . + "Term" . + . + . + . + "Disorder" . + "IDPO:00076" . + . + "IDP ontology" . + . + "289" . + "270" . + . + . + "Protein disordered region predicted by MobiDB-lite" . + . + . + "Term" . + . + . + "354" . + "322" . + . + . + "Protein disordered region predicted by MobiDB-lite" . + . + . + "Term" . + . + . + "468" . + "384" . + . + . + "Protein disordered region predicted by MobiDB-lite" . + . + . + "Term" . + . + . + "827" . + "808" . + . + . + "Protein disordered region predicted by MobiDB-lite" . + . + . + "Term" . + . + . + "918" . + "899" . + . + . + "Protein disordered region predicted by MobiDB-lite" . + . + . + "Term" . + . + . + "1064" . + "945" . + . + . + "Protein disordered region predicted by MobiDB-lite" . + . + . + "Term" . + . + . + "1271" . + "1198" . + . + . + . + . + . + "9606" . + . + . + "NCBI taxon" . + . + "MobiDB" . \ No newline at end of file