forked from D-Net/dnet-hadoop
added ped test with doi citation; fix pmid pid type
This commit is contained in:
parent
6d4235bd00
commit
41de8fb785
|
@ -2,12 +2,12 @@
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>mainPath</name>
|
<name>mainPath</name>
|
||||||
<value>/data/bioschema/mobidb</value>
|
<value>/data/bioschema/ped</value>
|
||||||
<description>the working path of Bioschema stores</description>
|
<description>the working path of Bioschema stores</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>datasourceKey</name>
|
<name>datasourceKey</name>
|
||||||
<value>mobidb</value>
|
<value>ped</value>
|
||||||
<description>the key that identifies the datasource (eg ped, disprot, mobidb)</description>
|
<description>the key that identifies the datasource (eg ped, disprot, mobidb)</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
|
|
|
@ -62,7 +62,7 @@ class BioschemaModelConstants extends Serializable {}
|
||||||
object BioschemaModelConstants {
|
object BioschemaModelConstants {
|
||||||
|
|
||||||
val PROTEIN_RESOURCETYPE: Qualifier =
|
val PROTEIN_RESOURCETYPE: Qualifier =
|
||||||
qualifier("protein", "protein", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)
|
qualifier("0047", "Protein", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)
|
||||||
|
|
||||||
val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(
|
val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(
|
||||||
false,
|
false,
|
||||||
|
@ -73,35 +73,35 @@ object BioschemaModelConstants {
|
||||||
"0.9"
|
"0.9"
|
||||||
)
|
)
|
||||||
|
|
||||||
val PED_PREFIX: String = "ped_________"
|
val PED_PREFIX: String = "fsh_____3595"
|
||||||
val DISPROT_PREFIX: String = "disprot_____"
|
val DISPROT_PREFIX: String = "fsh_____1904"
|
||||||
val MOBIDB_PREFIX: String = "mobidb______"
|
val MOBIDB_PREFIX: String = "fsh_____2176"
|
||||||
|
|
||||||
val resolvedURLPattern: Map[String, String] = Map(
|
val resolvedURLPattern: Map[String, String] = Map(
|
||||||
"https://identifiers.org/pubmed:" -> "pubmed",
|
"https://identifiers.org/pubmed:" -> "pmid",
|
||||||
"http://purl.uniprot.org/uniprot/" -> "uniprot",
|
"http://purl.uniprot.org/uniprot/" -> "uniprot",
|
||||||
"https://identifiers.org/uniprot:" -> "uniprot",
|
"https://identifiers.org/uniprot:" -> "uniprot",
|
||||||
"https://identifiers.org/disprot:" -> "disprot",
|
"https://identifiers.org/disprot:" -> "disprot",
|
||||||
"https://identifiers.org/mobidb:" -> "mobidb"
|
"https://identifiers.org/mobidb:" -> "mobidb",
|
||||||
|
"https://doi.org/" -> "doi"
|
||||||
)
|
)
|
||||||
|
|
||||||
//TODO create DatasourceId and update those value
|
|
||||||
val collectedFromMap: Map[String, KeyValue] = {
|
val collectedFromMap: Map[String, KeyValue] = {
|
||||||
val PEDCollectedFrom: KeyValue = OafMapperUtils.keyValue(
|
val PEDCollectedFrom: KeyValue = OafMapperUtils.keyValue(
|
||||||
"10|ped_________::pedDatasourceId",
|
"10|fairsharing_::c0e8517b1fe0b5270f3f41d4b56d6118",
|
||||||
"Protein Ensemble Database"
|
"Protein Ensemble Database"
|
||||||
)
|
)
|
||||||
PEDCollectedFrom.setDataInfo(DATA_INFO)
|
PEDCollectedFrom.setDataInfo(DATA_INFO)
|
||||||
|
|
||||||
val DISPROTCollectedFrom: KeyValue = OafMapperUtils.keyValue(
|
val DISPROTCollectedFrom: KeyValue = OafMapperUtils.keyValue(
|
||||||
"10|disprot_____::disprotDatasourceId",
|
"10|fairsharing_::c91591a8d461c2869b9f535ded3e213e",
|
||||||
"DisProt, the database of intrinsically disordered proteins"
|
"Database of Protein Disorder"
|
||||||
)
|
)
|
||||||
DISPROTCollectedFrom.setDataInfo(DATA_INFO)
|
DISPROTCollectedFrom.setDataInfo(DATA_INFO)
|
||||||
|
|
||||||
val MOBIDBCollectedFrom: KeyValue = OafMapperUtils.keyValue(
|
val MOBIDBCollectedFrom: KeyValue = OafMapperUtils.keyValue(
|
||||||
"10|mobidb______::mobidbDatasourceId",
|
"10|fairsharing_::0234c510bc6d908b28c70ff313743079",
|
||||||
"MobiDB, a database of protein disorder and mobility annotations"
|
"MobiDB"
|
||||||
)
|
)
|
||||||
MOBIDBCollectedFrom.setDataInfo(DATA_INFO)
|
MOBIDBCollectedFrom.setDataInfo(DATA_INFO)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,80 @@
|
||||||
|
{
|
||||||
|
"id": "PED00180#Q9KPH3_A_0",
|
||||||
|
"types": {
|
||||||
|
"resourceType": "Protein",
|
||||||
|
"resourceTypeGeneral": "Dataset"
|
||||||
|
},
|
||||||
|
"creators": [],
|
||||||
|
"identifiers": [
|
||||||
|
{
|
||||||
|
"identifier": "https://proteinensemble.org/PED00180#Q9KPH3_A_0",
|
||||||
|
"identifierType": "URL"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"relatedIdentifiers": [
|
||||||
|
{
|
||||||
|
"relationType": "IsCitedBy",
|
||||||
|
"relatedIdentifier": "https://doi.org/10.1016/j.jsb.2020.107573",
|
||||||
|
"relatedIdentifierType": "URL"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"relationType": "IsIdenticalTo",
|
||||||
|
"relatedIdentifier": "http://purl.uniprot.org/uniprot/Q9KPH3",
|
||||||
|
"relatedIdentifierType": "URL"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"alternateIdentifiers": [
|
||||||
|
{
|
||||||
|
"alternateIdentifier": "https://identifiers.org/uniprot:Q9KPH3"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"descriptions": [],
|
||||||
|
"titles": [
|
||||||
|
{
|
||||||
|
"title": "PED00180#Q9KPH3_A_0 - Structural ensemble of protein DciA from Vibrio cholerae (1-157) "
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"dates": [
|
||||||
|
{
|
||||||
|
"date": "2022-05-10T18:03:43",
|
||||||
|
"dateType": "Collected"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"subjects": [
|
||||||
|
{
|
||||||
|
"schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00120",
|
||||||
|
"value": "NMR",
|
||||||
|
"subjectScheme": "IDPO:00120"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00125",
|
||||||
|
"value": "SAXS",
|
||||||
|
"subjectScheme": "IDPO:00125"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00224",
|
||||||
|
"value": "GROMACS",
|
||||||
|
"subjectScheme": "IDPO:00224"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl#IDPO:00228",
|
||||||
|
"value": "AMBER",
|
||||||
|
"subjectScheme": "IDPO:00228"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl:00185",
|
||||||
|
"value": "FM",
|
||||||
|
"subjectScheme": "IDPO:00185"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl:00213",
|
||||||
|
"value": "EOM",
|
||||||
|
"subjectScheme": "IDPO:00213"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"schemeURI": "https://disprot.org/assets/data/IDPO_v0.2.owl:00214",
|
||||||
|
"value": "GAJOE",
|
||||||
|
"subjectScheme": "IDPO:00214"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -99,4 +99,17 @@ class BioschemaDataciteToOAFTest {
|
||||||
println("----------------------------")
|
println("----------------------------")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testPEDDoiCitationMapping(): Unit = {
|
||||||
|
val record = Source
|
||||||
|
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/bioschema/ped_doi_citation.json"))
|
||||||
|
.mkString
|
||||||
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||||
|
val res: List[Oaf] = BioschemaToOAFTransformation.generateOAF(record, true, "ped", "protein")
|
||||||
|
res.foreach(r => {
|
||||||
|
println(mapper.writeValueAsString(r))
|
||||||
|
println("----------------------------")
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue