From 41de8fb7859ffa5520d974bcfd68deb2d610d00c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 13 May 2022 12:27:40 +0200 Subject: [PATCH] added ped test with doi citation; fix pmid pid type --- .../dhp/bioschema/oozie_app/workflow.xml | 4 +- .../bioschema/BioschemaModelConstants.scala | 24 +++--- .../dhp/bioschema/ped_doi_citation.json | 80 +++++++++++++++++++ .../BioschemaDataciteToOAFTest.scala | 13 +++ 4 files changed, 107 insertions(+), 14 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/bioschema/ped_doi_citation.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml index deacb878b..43f39f6c2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml @@ -2,12 +2,12 @@ mainPath - /data/bioschema/mobidb + /data/bioschema/ped the working path of Bioschema stores datasourceKey - mobidb + ped the key that identifies the datasource (eg ped, disprot, mobidb) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala index 86b6d2980..1ffe6ec61 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala @@ -62,7 +62,7 @@ class BioschemaModelConstants extends Serializable {} object BioschemaModelConstants { val PROTEIN_RESOURCETYPE: Qualifier = - qualifier("protein", "protein", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE) + qualifier("0047", "Protein", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE) val DATA_INFO: DataInfo = OafMapperUtils.dataInfo( false, @@ -73,35 +73,35 @@ object BioschemaModelConstants { "0.9" ) - val PED_PREFIX: String = "ped_________" - val DISPROT_PREFIX: String = "disprot_____" - val MOBIDB_PREFIX: String = "mobidb______" + val PED_PREFIX: String = "fsh_____3595" + val DISPROT_PREFIX: String = "fsh_____1904" + val MOBIDB_PREFIX: String = "fsh_____2176" val resolvedURLPattern: Map[String, String] = Map( - "https://identifiers.org/pubmed:" -> "pubmed", + "https://identifiers.org/pubmed:" -> "pmid", "http://purl.uniprot.org/uniprot/" -> "uniprot", "https://identifiers.org/uniprot:" -> "uniprot", "https://identifiers.org/disprot:" -> "disprot", - "https://identifiers.org/mobidb:" -> "mobidb" + "https://identifiers.org/mobidb:" -> "mobidb", + "https://doi.org/" -> "doi" ) - //TODO create DatasourceId and update those value val collectedFromMap: Map[String, KeyValue] = { val PEDCollectedFrom: KeyValue = OafMapperUtils.keyValue( - "10|ped_________::pedDatasourceId", + "10|fairsharing_::c0e8517b1fe0b5270f3f41d4b56d6118", "Protein Ensemble Database" ) PEDCollectedFrom.setDataInfo(DATA_INFO) val DISPROTCollectedFrom: KeyValue = OafMapperUtils.keyValue( - "10|disprot_____::disprotDatasourceId", - "DisProt, the database of intrinsically disordered proteins" + "10|fairsharing_::c91591a8d461c2869b9f535ded3e213e", + "Database of Protein Disorder" ) DISPROTCollectedFrom.setDataInfo(DATA_INFO) val MOBIDBCollectedFrom: KeyValue = OafMapperUtils.keyValue( - "10|mobidb______::mobidbDatasourceId", - "MobiDB, a database of protein disorder and mobility annotations" + "10|fairsharing_::0234c510bc6d908b28c70ff313743079", + "MobiDB" ) MOBIDBCollectedFrom.setDataInfo(DATA_INFO) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/bioschema/ped_doi_citation.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/bioschema/ped_doi_citation.json new file mode 100644 index 000000000..36c4949de --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/bioschema/ped_doi_citation.json @@ -0,0 +1,80 @@ +{ + "id": "PED00180#Q9KPH3_A_0", + "types": { + "resourceType": "Protein", + "resourceTypeGeneral": "Dataset" + }, + "creators": [], + "identifiers": [ + { + "identifier": "https://proteinensemble.org/PED00180#Q9KPH3_A_0", + "identifierType": "URL" + } + ], + "relatedIdentifiers": [ + { + "relationType": "IsCitedBy", + "relatedIdentifier": "https://doi.org/10.1016/j.jsb.2020.107573", + "relatedIdentifierType": "URL" + }, + { + "relationType": "IsIdenticalTo", + "relatedIdentifier": "http://purl.uniprot.org/uniprot/Q9KPH3", + "relatedIdentifierType": "URL" + } + ], + "alternateIdentifiers": [ + { + "alternateIdentifier": "https://identifiers.org/uniprot:Q9KPH3" + } + ], + "descriptions": [], + "titles": [ + { + "title": "PED00180#Q9KPH3_A_0 - Structural ensemble of protein DciA from Vibrio cholerae (1-157) " + } + ], + "dates": [ + { + "date": "2022-05-10T18:03:43", + "dateType": "Collected" + } + ], + "subjects": [ + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120", + "value": "NMR", + "subjectScheme": "IDPO:00120" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125", + "value": "SAXS", + "subjectScheme": "IDPO:00125" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00224", + "value": "GROMACS", + "subjectScheme": "IDPO:00224" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00228", + "value": "AMBER", + "subjectScheme": "IDPO:00228" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl:00185", + "value": "FM", + "subjectScheme": "IDPO:00185" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl:00213", + "value": "EOM", + "subjectScheme": "IDPO:00213" + }, + { + "schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl:00214", + "value": "GAJOE", + "subjectScheme": "IDPO:00214" + } + ] +} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/bioschema/BioschemaDataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/bioschema/BioschemaDataciteToOAFTest.scala index 5699f45c9..bed7fd994 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/bioschema/BioschemaDataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/bioschema/BioschemaDataciteToOAFTest.scala @@ -99,4 +99,17 @@ class BioschemaDataciteToOAFTest { println("----------------------------") }) } + + @Test + def testPEDDoiCitationMapping(): Unit = { + val record = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/bioschema/ped_doi_citation.json")) + .mkString + val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT) + val res: List[Oaf] = BioschemaToOAFTransformation.generateOAF(record, true, "ped", "protein") + res.foreach(r => { + println(mapper.writeValueAsString(r)) + println("----------------------------") + }) + } }