registries_analysis/notebooks/01.1-exploration-fairsharin...

2090 lines
98 KiB
Plaintext
Raw Normal View History

2022-03-17 10:33:11 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"import csv\n",
"import json\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading datasets"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>attributes.created-at</th>\n",
" <th>attributes.updated-at</th>\n",
" <th>attributes.metadata.doi</th>\n",
" <th>attributes.metadata.name</th>\n",
" <th>attributes.metadata.status</th>\n",
" <th>attributes.metadata.contacts</th>\n",
" <th>attributes.metadata.homepage</th>\n",
" <th>attributes.metadata.identifier</th>\n",
" <th>attributes.metadata.description</th>\n",
" <th>attributes.metadata.abbreviation</th>\n",
" <th>attributes.metadata.support-links</th>\n",
" <th>attributes.metadata.year-creation</th>\n",
" <th>attributes.metadata.data-processes</th>\n",
" <th>attributes.metadata.cross-references</th>\n",
" <th>attributes.legacy-ids</th>\n",
" <th>attributes.fairsharing-registry</th>\n",
" <th>attributes.record-type</th>\n",
" <th>attributes.subjects</th>\n",
" <th>attributes.domains</th>\n",
" <th>attributes.taxonomies</th>\n",
" <th>attributes.user-defined-tags</th>\n",
" <th>attributes.countries</th>\n",
" <th>attributes.name</th>\n",
" <th>attributes.abbreviation</th>\n",
" <th>attributes.url</th>\n",
" <th>attributes.doi</th>\n",
" <th>attributes.fairsharing-licence</th>\n",
" <th>attributes.description</th>\n",
" <th>attributes.publications</th>\n",
" <th>attributes.licence-links</th>\n",
" <th>attributes.url-for-logo</th>\n",
" <th>attributes.metadata.citations</th>\n",
" <th>attributes.metadata.associated-tools</th>\n",
" <th>attributes.metadata.deprecation-reason</th>\n",
" <th>attributes.metadata.data-access-condition.type</th>\n",
" <th>attributes.metadata.data-contact-information</th>\n",
" <th>attributes.metadata.data-deposition-condition.url</th>\n",
" <th>attributes.metadata.data-deposition-condition.type</th>\n",
" <th>attributes.metadata.deprecation-date</th>\n",
" <th>attributes.metadata.access-points</th>\n",
" <th>attributes.metadata.data-access-condition.url</th>\n",
" <th>attributes.metadata.resource-sustainability.url</th>\n",
" <th>attributes.metadata.resource-sustainability.name</th>\n",
" <th>attributes.metadata.data-preservation-policy.url</th>\n",
" <th>attributes.metadata.data-preservation-policy.name</th>\n",
" <th>attributes.metadata.data-access-for-pre-publication-review</th>\n",
" <th>attributes.metadata.data-versioning</th>\n",
" <th>attributes.metadata.data-curation.type</th>\n",
" <th>attributes.metadata.data-curation.url</th>\n",
" <th>attributes.metadata.citation-to-related-publications</th>\n",
" <th>attributes.metadata.tombstone</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3226</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2020-12-09T11:53:44.000Z</td>\n",
" <td>2022-02-08T10:42:36.452Z</td>\n",
" <td>10.25504/FAIRsharing.d6423b</td>\n",
" <td>WDC Sunspot Index and Long-term Solar Observat...</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'Frédéric Clette', 'contact-...</td>\n",
" <td>http://sidc.be/silso/home</td>\n",
" <td>3226</td>\n",
" <td>The WDC-SILSO is an activity of the Operationa...</td>\n",
" <td>WDC-SILSO</td>\n",
" <td>[{'url': 'http://www.sidc.be/silso/taxonomy/te...</td>\n",
" <td>2013.0</td>\n",
" <td>[{'url': 'http://www.sidc.be/silso/datafiles',...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[biodbcore-001740, bsg-d001740]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Electromagnetism, Astrophysics and Astronomy,...</td>\n",
" <td>[Climate, Observation design]</td>\n",
" <td>[Not applicable]</td>\n",
" <td>[Climate change, earth observation, Electromag...</td>\n",
" <td>[Belgium]</td>\n",
" <td>FAIRsharing record for: WDC Sunspot Index and ...</td>\n",
" <td>WDC-SILSO</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
" <td>10.25504/FAIRsharing.d6423b</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: The WDC-SIL...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'SILSO legal notices', 'lice...</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2114</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2014-11-04T15:23:40.000Z</td>\n",
" <td>2022-01-21T14:39:02.195Z</td>\n",
" <td>10.25504/FAIRsharing.p06nme</td>\n",
" <td>Biological Magnetic Resonance Data Bank</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'Helpdesk', 'contact-email':...</td>\n",
" <td>https://bmrb.io/</td>\n",
" <td>2114</td>\n",
" <td>BMRB collects, annotates, archives, and dissem...</td>\n",
" <td>BMRB</td>\n",
" <td>[{'url': 'https://bmrb.io/bmrb/news/', 'name':...</td>\n",
" <td>1988.0</td>\n",
" <td>[{'url': 'https://bmrb.io/data_library/rsync.s...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[biodbcore-000584, bsg-d000584]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Structural Biology]</td>\n",
" <td>[Molecular structure, Protein structure, Pepti...</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: Biological Magnetic Re...</td>\n",
" <td>BMRB</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.p...</td>\n",
" <td>10.25504/FAIRsharing.p06nme</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: BMRB collec...</td>\n",
" <td>[{'id': 552, 'pubmed_id': 18288446, 'title': '...</td>\n",
" <td>[{'licence-name': 'wwPDB Privacy and Usage Pol...</td>\n",
" <td>None</td>\n",
" <td>[{'doi': '10.1093/nar/gkm957', 'pubmed-id': 17...</td>\n",
" <td>[{'url': 'https://bmrb.io/validate/', 'name': ...</td>\n",
" <td></td>\n",
" <td>open</td>\n",
" <td>yes</td>\n",
" <td>https://bmrb.io/deposit/</td>\n",
" <td>open</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3022</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2020-06-17T10:25:30.000Z</td>\n",
" <td>2022-02-08T10:41:04.073Z</td>\n",
" <td>10.25504/FAIRsharing.8b7a2f</td>\n",
" <td>Fisheries and Oceans Canada Pacific Region Dat...</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'Peter Chandler', 'contact-e...</td>\n",
" <td>http://www.pac.dfo-mpo.gc.ca/science/oceans/da...</td>\n",
" <td>3022</td>\n",
" <td>The Institute of Ocean Sciences (IOS)/Ocean Sc...</td>\n",
" <td>None</td>\n",
" <td>[{'url': 'DFO.PAC.SCI.IOSData-DonneesISO.SCI.P...</td>\n",
" <td>NaN</td>\n",
" <td>[{'name': 'Users must contact the Senior Analy...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[biodbcore-001530, bsg-d001530]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Environmental Science, Meteorology, Earth Sci...</td>\n",
" <td>[Climate]</td>\n",
" <td>[Not applicable]</td>\n",
" <td>[Salinity, Temperature]</td>\n",
" <td>[Canada]</td>\n",
" <td>FAIRsharing record for: Fisheries and Oceans C...</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.8...</td>\n",
" <td>10.25504/FAIRsharing.8b7a2f</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: The Institu...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'Fisheries and Oceans Canada...</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2998</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2020-05-21T07:42:30.000Z</td>\n",
" <td>2022-02-08T10:40:19.531Z</td>\n",
" <td>10.25504/FAIRsharing.e08886</td>\n",
" <td>Climate Prediction Center</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'Jon Hoopingarner', 'contact...</td>\n",
" <td>https://www.cpc.ncep.noaa.gov/</td>\n",
" <td>2998</td>\n",
" <td>The Climate Prediction Center (CPC) produces o...</td>\n",
" <td>CPC</td>\n",
" <td>[{'url': 'https://www.cpc.ncep.noaa.gov/commen...</td>\n",
" <td>1970.0</td>\n",
" <td>[{'url': 'https://www.cpc.ncep.noaa.gov/', 'na...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[biodbcore-001504, bsg-d001504]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Hydrogeology, Geography, Meteorology, Geodesy...</td>\n",
" <td>[Climate]</td>\n",
" <td>[Not applicable]</td>\n",
" <td>[Forecasting, weather]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: Climate Prediction Center</td>\n",
" <td>CPC</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.e...</td>\n",
" <td>10.25504/FAIRsharing.e08886</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: The Climate...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'National Weather Service Di...</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2301</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2016-06-03T14:54:08.000Z</td>\n",
" <td>2021-11-24T13:17:51.201Z</td>\n",
" <td>10.25504/FAIRsharing.meh9wz</td>\n",
" <td>Acytostelium Gene Database</td>\n",
" <td>deprecated</td>\n",
" <td>[{'contact-name': 'Acytostelium genome consort...</td>\n",
" <td>http://cosmos.bot.kyoto-u.ac.jp/acytodb//cgi-b...</td>\n",
" <td>2301</td>\n",
" <td>Genome and transcriptome database of Acytostel...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2008.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[biodbcore-000775, bsg-d000775]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Genomics, Life Science, Transcriptomics]</td>\n",
" <td>[DNA sequence data, Gene model annotation]</td>\n",
" <td>[Acytostelium subglobosum]</td>\n",
" <td>[]</td>\n",
" <td>[United Kingdom, Japan]</td>\n",
" <td>FAIRsharing record for: Acytostelium Gene Data...</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.m...</td>\n",
" <td>10.25504/FAIRsharing.meh9wz</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: Genome and ...</td>\n",
" <td>[{'id': 1139, 'pubmed_id': 25758444, 'title': ...</td>\n",
" <td>[]</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>This resource is no longer available at the st...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2021-9-17</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type attributes.created-at \\\n",
"0 3226 fairsharing-records 2020-12-09T11:53:44.000Z \n",
"1 2114 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"2 3022 fairsharing-records 2020-06-17T10:25:30.000Z \n",
"3 2998 fairsharing-records 2020-05-21T07:42:30.000Z \n",
"4 2301 fairsharing-records 2016-06-03T14:54:08.000Z \n",
"\n",
" attributes.updated-at attributes.metadata.doi \\\n",
"0 2022-02-08T10:42:36.452Z 10.25504/FAIRsharing.d6423b \n",
"1 2022-01-21T14:39:02.195Z 10.25504/FAIRsharing.p06nme \n",
"2 2022-02-08T10:41:04.073Z 10.25504/FAIRsharing.8b7a2f \n",
"3 2022-02-08T10:40:19.531Z 10.25504/FAIRsharing.e08886 \n",
"4 2021-11-24T13:17:51.201Z 10.25504/FAIRsharing.meh9wz \n",
"\n",
" attributes.metadata.name \\\n",
"0 WDC Sunspot Index and Long-term Solar Observat... \n",
"1 Biological Magnetic Resonance Data Bank \n",
"2 Fisheries and Oceans Canada Pacific Region Dat... \n",
"3 Climate Prediction Center \n",
"4 Acytostelium Gene Database \n",
"\n",
" attributes.metadata.status \\\n",
"0 ready \n",
"1 ready \n",
"2 ready \n",
"3 ready \n",
"4 deprecated \n",
"\n",
" attributes.metadata.contacts \\\n",
"0 [{'contact-name': 'Frédéric Clette', 'contact-... \n",
"1 [{'contact-name': 'Helpdesk', 'contact-email':... \n",
"2 [{'contact-name': 'Peter Chandler', 'contact-e... \n",
"3 [{'contact-name': 'Jon Hoopingarner', 'contact... \n",
"4 [{'contact-name': 'Acytostelium genome consort... \n",
"\n",
" attributes.metadata.homepage \\\n",
"0 http://sidc.be/silso/home \n",
"1 https://bmrb.io/ \n",
"2 http://www.pac.dfo-mpo.gc.ca/science/oceans/da... \n",
"3 https://www.cpc.ncep.noaa.gov/ \n",
"4 http://cosmos.bot.kyoto-u.ac.jp/acytodb//cgi-b... \n",
"\n",
" attributes.metadata.identifier \\\n",
"0 3226 \n",
"1 2114 \n",
"2 3022 \n",
"3 2998 \n",
"4 2301 \n",
"\n",
" attributes.metadata.description \\\n",
"0 The WDC-SILSO is an activity of the Operationa... \n",
"1 BMRB collects, annotates, archives, and dissem... \n",
"2 The Institute of Ocean Sciences (IOS)/Ocean Sc... \n",
"3 The Climate Prediction Center (CPC) produces o... \n",
"4 Genome and transcriptome database of Acytostel... \n",
"\n",
" attributes.metadata.abbreviation \\\n",
"0 WDC-SILSO \n",
"1 BMRB \n",
"2 None \n",
"3 CPC \n",
"4 NaN \n",
"\n",
" attributes.metadata.support-links \\\n",
"0 [{'url': 'http://www.sidc.be/silso/taxonomy/te... \n",
"1 [{'url': 'https://bmrb.io/bmrb/news/', 'name':... \n",
"2 [{'url': 'DFO.PAC.SCI.IOSData-DonneesISO.SCI.P... \n",
"3 [{'url': 'https://www.cpc.ncep.noaa.gov/commen... \n",
"4 NaN \n",
"\n",
" attributes.metadata.year-creation \\\n",
"0 2013.0 \n",
"1 1988.0 \n",
"2 NaN \n",
"3 1970.0 \n",
"4 2008.0 \n",
"\n",
" attributes.metadata.data-processes \\\n",
"0 [{'url': 'http://www.sidc.be/silso/datafiles',... \n",
"1 [{'url': 'https://bmrb.io/data_library/rsync.s... \n",
"2 [{'name': 'Users must contact the Senior Analy... \n",
"3 [{'url': 'https://www.cpc.ncep.noaa.gov/', 'na... \n",
"4 NaN \n",
"\n",
" attributes.metadata.cross-references \\\n",
"0 [{'url': 'https://www.re3data.org/repository/r... \n",
"1 [{'url': 'https://www.re3data.org/repository/r... \n",
"2 [{'url': 'https://www.re3data.org/repository/r... \n",
"3 [{'url': 'https://www.re3data.org/repository/r... \n",
"4 NaN \n",
"\n",
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
"0 [biodbcore-001740, bsg-d001740] Database \n",
"1 [biodbcore-000584, bsg-d000584] Database \n",
"2 [biodbcore-001530, bsg-d001530] Database \n",
"3 [biodbcore-001504, bsg-d001504] Database \n",
"4 [biodbcore-000775, bsg-d000775] Database \n",
"\n",
" attributes.record-type attributes.subjects \\\n",
"0 repository [Electromagnetism, Astrophysics and Astronomy,... \n",
"1 repository [Structural Biology] \n",
"2 repository [Environmental Science, Meteorology, Earth Sci... \n",
"3 repository [Hydrogeology, Geography, Meteorology, Geodesy... \n",
"4 repository [Genomics, Life Science, Transcriptomics] \n",
"\n",
" attributes.domains \\\n",
"0 [Climate, Observation design] \n",
"1 [Molecular structure, Protein structure, Pepti... \n",
"2 [Climate] \n",
"3 [Climate] \n",
"4 [DNA sequence data, Gene model annotation] \n",
"\n",
" attributes.taxonomies \\\n",
"0 [Not applicable] \n",
"1 [All] \n",
"2 [Not applicable] \n",
"3 [Not applicable] \n",
"4 [Acytostelium subglobosum] \n",
"\n",
" attributes.user-defined-tags attributes.countries \\\n",
"0 [Climate change, earth observation, Electromag... [Belgium] \n",
"1 [] [United States] \n",
"2 [Salinity, Temperature] [Canada] \n",
"3 [Forecasting, weather] [United States] \n",
"4 [] [United Kingdom, Japan] \n",
"\n",
" attributes.name attributes.abbreviation \\\n",
"0 FAIRsharing record for: WDC Sunspot Index and ... WDC-SILSO \n",
"1 FAIRsharing record for: Biological Magnetic Re... BMRB \n",
"2 FAIRsharing record for: Fisheries and Oceans C... None \n",
"3 FAIRsharing record for: Climate Prediction Center CPC \n",
"4 FAIRsharing record for: Acytostelium Gene Data... None \n",
"\n",
" attributes.url \\\n",
"0 https://fairsharing.org/10.25504/FAIRsharing.d... \n",
"1 https://fairsharing.org/10.25504/FAIRsharing.p... \n",
"2 https://fairsharing.org/10.25504/FAIRsharing.8... \n",
"3 https://fairsharing.org/10.25504/FAIRsharing.e... \n",
"4 https://fairsharing.org/10.25504/FAIRsharing.m... \n",
"\n",
" attributes.doi \\\n",
"0 10.25504/FAIRsharing.d6423b \n",
"1 10.25504/FAIRsharing.p06nme \n",
"2 10.25504/FAIRsharing.8b7a2f \n",
"3 10.25504/FAIRsharing.e08886 \n",
"4 10.25504/FAIRsharing.meh9wz \n",
"\n",
" attributes.fairsharing-licence \\\n",
"0 https://creativecommons.org/licenses/by-sa/4.0... \n",
"1 https://creativecommons.org/licenses/by-sa/4.0... \n",
"2 https://creativecommons.org/licenses/by-sa/4.0... \n",
"3 https://creativecommons.org/licenses/by-sa/4.0... \n",
"4 https://creativecommons.org/licenses/by-sa/4.0... \n",
"\n",
" attributes.description \\\n",
"0 This FAIRsharing record describes: The WDC-SIL... \n",
"1 This FAIRsharing record describes: BMRB collec... \n",
"2 This FAIRsharing record describes: The Institu... \n",
"3 This FAIRsharing record describes: The Climate... \n",
"4 This FAIRsharing record describes: Genome and ... \n",
"\n",
" attributes.publications \\\n",
"0 [] \n",
"1 [{'id': 552, 'pubmed_id': 18288446, 'title': '... \n",
"2 [] \n",
"3 [] \n",
"4 [{'id': 1139, 'pubmed_id': 25758444, 'title': ... \n",
"\n",
" attributes.licence-links attributes.url-for-logo \\\n",
"0 [{'licence-name': 'SILSO legal notices', 'lice... None \n",
"1 [{'licence-name': 'wwPDB Privacy and Usage Pol... None \n",
"2 [{'licence-name': 'Fisheries and Oceans Canada... None \n",
"3 [{'licence-name': 'National Weather Service Di... None \n",
"4 [] None \n",
"\n",
" attributes.metadata.citations \\\n",
"0 NaN \n",
"1 [{'doi': '10.1093/nar/gkm957', 'pubmed-id': 17... \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.associated-tools \\\n",
"0 NaN \n",
"1 [{'url': 'https://bmrb.io/validate/', 'name': ... \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.deprecation-reason \\\n",
"0 NaN \n",
"1 \n",
"2 NaN \n",
"3 NaN \n",
"4 This resource is no longer available at the st... \n",
"\n",
" attributes.metadata.data-access-condition.type \\\n",
"0 NaN \n",
"1 open \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-contact-information \\\n",
"0 NaN \n",
"1 yes \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.url \\\n",
"0 NaN \n",
"1 https://bmrb.io/deposit/ \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.type \\\n",
"0 NaN \n",
"1 open \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.deprecation-date attributes.metadata.access-points \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 2021-9-17 NaN \n",
"\n",
" attributes.metadata.data-access-condition.url \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.resource-sustainability.url \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.resource-sustainability.name \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.url \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.name \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-access-for-pre-publication-review \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-versioning attributes.metadata.data-curation.type \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" attributes.metadata.data-curation.url \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.citation-to-related-publications \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.tombstone \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('../data/raw/fairsharing_dump_api_02_2022.json') as f:\n",
" lines = f.read().splitlines()\n",
" \n",
"fairsharing_df = pd.DataFrame(lines)\n",
"fairsharing_df.columns = ['json_element']\n",
"fairsharing_df['json_element'].apply(json.loads)\n",
"fairsharing_df = pd.json_normalize(fairsharing_df['json_element'].apply(json.loads))\n",
"\n",
"fairsharing_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>attributes.created-at</th>\n",
" <th>attributes.updated-at</th>\n",
" <th>attributes.metadata.doi</th>\n",
" <th>attributes.metadata.name</th>\n",
" <th>attributes.metadata.status</th>\n",
" <th>attributes.metadata.contacts</th>\n",
" <th>attributes.metadata.homepage</th>\n",
" <th>attributes.metadata.identifier</th>\n",
" <th>attributes.metadata.description</th>\n",
" <th>attributes.metadata.abbreviation</th>\n",
" <th>attributes.metadata.support-links</th>\n",
" <th>attributes.metadata.year-creation</th>\n",
" <th>attributes.metadata.data-processes</th>\n",
" <th>attributes.metadata.cross-references</th>\n",
" <th>attributes.legacy-ids</th>\n",
" <th>attributes.fairsharing-registry</th>\n",
" <th>attributes.record-type</th>\n",
" <th>attributes.subjects</th>\n",
" <th>attributes.domains</th>\n",
" <th>attributes.taxonomies</th>\n",
" <th>attributes.user-defined-tags</th>\n",
" <th>attributes.countries</th>\n",
" <th>attributes.name</th>\n",
" <th>attributes.abbreviation</th>\n",
" <th>attributes.url</th>\n",
" <th>attributes.doi</th>\n",
" <th>attributes.fairsharing-licence</th>\n",
" <th>attributes.description</th>\n",
" <th>attributes.publications</th>\n",
" <th>attributes.licence-links</th>\n",
" <th>attributes.url-for-logo</th>\n",
" <th>attributes.metadata.citations</th>\n",
" <th>attributes.metadata.associated-tools</th>\n",
" <th>attributes.metadata.deprecation-reason</th>\n",
" <th>attributes.metadata.data-access-condition.type</th>\n",
" <th>attributes.metadata.data-contact-information</th>\n",
" <th>attributes.metadata.data-deposition-condition.url</th>\n",
" <th>attributes.metadata.data-deposition-condition.type</th>\n",
" <th>attributes.metadata.deprecation-date</th>\n",
" <th>attributes.metadata.access-points</th>\n",
" <th>attributes.metadata.data-access-condition.url</th>\n",
" <th>attributes.metadata.resource-sustainability.url</th>\n",
" <th>attributes.metadata.resource-sustainability.name</th>\n",
" <th>attributes.metadata.data-preservation-policy.url</th>\n",
" <th>attributes.metadata.data-preservation-policy.name</th>\n",
" <th>attributes.metadata.data-access-for-pre-publication-review</th>\n",
" <th>attributes.metadata.data-versioning</th>\n",
" <th>attributes.metadata.data-curation.type</th>\n",
" <th>attributes.metadata.data-curation.url</th>\n",
" <th>attributes.metadata.citation-to-related-publications</th>\n",
" <th>attributes.metadata.tombstone</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1601</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1764</td>\n",
" <td>1853</td>\n",
" <td>1853.000000</td>\n",
" <td>1853</td>\n",
" <td>1671</td>\n",
" <td>1663</td>\n",
" <td>1541.000000</td>\n",
" <td>1626</td>\n",
" <td>790</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1671</td>\n",
" <td>1853</td>\n",
" <td>1601</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>18</td>\n",
" <td>621</td>\n",
" <td>632</td>\n",
" <td>363</td>\n",
" <td>42</td>\n",
" <td>47</td>\n",
" <td>22</td>\n",
" <td>33</td>\n",
" <td>238</td>\n",
" <td>465</td>\n",
" <td>19</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>10</td>\n",
" <td>17</td>\n",
" <td>22</td>\n",
" <td>8</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>1853</td>\n",
" <td>1</td>\n",
" <td>1218</td>\n",
" <td>1853</td>\n",
" <td>1601</td>\n",
" <td>1851</td>\n",
" <td>4</td>\n",
" <td>1623</td>\n",
" <td>1853</td>\n",
" <td>NaN</td>\n",
" <td>1853</td>\n",
" <td>1655</td>\n",
" <td>1646</td>\n",
" <td>NaN</td>\n",
" <td>1625</td>\n",
" <td>790</td>\n",
" <td>1799</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>935</td>\n",
" <td>1205</td>\n",
" <td>385</td>\n",
" <td>395</td>\n",
" <td>194</td>\n",
" <td>1851</td>\n",
" <td>1655</td>\n",
" <td>1853</td>\n",
" <td>1601</td>\n",
" <td>1</td>\n",
" <td>1853</td>\n",
" <td>1135</td>\n",
" <td>1119</td>\n",
" <td>18</td>\n",
" <td>331</td>\n",
" <td>627</td>\n",
" <td>104</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>22</td>\n",
" <td>2</td>\n",
" <td>71</td>\n",
" <td>460</td>\n",
" <td>19</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>3226</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2014-11-04T15:23:40.000Z</td>\n",
" <td>2022-02-08T10:42:36.452Z</td>\n",
" <td>10.25504/FAIRsharing.d6423b</td>\n",
" <td>iDog</td>\n",
" <td>ready</td>\n",
" <td>[]</td>\n",
" <td>http://sidc.be/silso/home</td>\n",
" <td>NaN</td>\n",
" <td>The WDC-SILSO is an activity of the Operationa...</td>\n",
" <td>CGD</td>\n",
" <td>[{'url': 'https://github.com/gbif/ipt/wiki/IPT...</td>\n",
" <td>NaN</td>\n",
" <td>[{'url': 'https://site.uit.no/dataverseno/abou...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Life Science]</td>\n",
" <td>[]</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: iDog</td>\n",
" <td>CGD</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
" <td>10.25504/FAIRsharing.d6423b</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: The WDC-SIL...</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>/rails/active_storage/blobs/redirect/eyJfcmFpb...</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td></td>\n",
" <td>open</td>\n",
" <td>yes</td>\n",
" <td>https://bmrb.io/deposit/</td>\n",
" <td>controlled</td>\n",
" <td>2021-9-17</td>\n",
" <td>[{'url': 'https://heidata.uni-heidelberg.de/oa...</td>\n",
" <td>https://arch.library.northwestern.edu/about?lo...</td>\n",
" <td>https://www.library.northwestern.edu/about/adm...</td>\n",
" <td>Commitment to Sustainability: Level 1</td>\n",
" <td>http://www.library.northwestern.edu/about/admi...</td>\n",
" <td>Digital Preservation Policy: Level 1</td>\n",
" <td>yes</td>\n",
" <td>yes</td>\n",
" <td>manual</td>\n",
" <td>https://www.gbif.org/tools/data-validator/about</td>\n",
" <td>yes</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>1853</td>\n",
" <td>636</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1564</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>55</td>\n",
" <td>1853</td>\n",
" <td>954</td>\n",
" <td>345</td>\n",
" <td>276</td>\n",
" <td>528</td>\n",
" <td>1258</td>\n",
" <td>607</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1853</td>\n",
" <td>1</td>\n",
" <td>690</td>\n",
" <td>735</td>\n",
" <td>1</td>\n",
" <td>285</td>\n",
" <td>3</td>\n",
" <td>125</td>\n",
" <td>38</td>\n",
" <td>45</td>\n",
" <td>1</td>\n",
" <td>21</td>\n",
" <td>81</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>16</td>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>34</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2481.862925</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2007.894873</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>554.072492</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.933713</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1120.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1894.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2009.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2004.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2473.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2010.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2938.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2015.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3827.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type attributes.created-at \\\n",
"count 1853 1853 1853 \n",
"unique 1853 1 1218 \n",
"top 3226 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"freq 1 1853 636 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" attributes.updated-at attributes.metadata.doi \\\n",
"count 1853 1601 \n",
"unique 1853 1601 \n",
"top 2022-02-08T10:42:36.452Z 10.25504/FAIRsharing.d6423b \n",
"freq 1 1 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.name attributes.metadata.status \\\n",
"count 1853 1853 \n",
"unique 1851 4 \n",
"top iDog ready \n",
"freq 2 1564 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.contacts attributes.metadata.homepage \\\n",
"count 1764 1853 \n",
"unique 1623 1853 \n",
"top [] http://sidc.be/silso/home \n",
"freq 40 1 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.identifier \\\n",
"count 1853.000000 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 2481.862925 \n",
"std 554.072492 \n",
"min 1120.000000 \n",
"25% 2009.000000 \n",
"50% 2473.000000 \n",
"75% 2938.000000 \n",
"max 3827.000000 \n",
"\n",
" attributes.metadata.description \\\n",
"count 1853 \n",
"unique 1853 \n",
"top The WDC-SILSO is an activity of the Operationa... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.abbreviation \\\n",
"count 1671 \n",
"unique 1655 \n",
"top CGD \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.support-links \\\n",
"count 1663 \n",
"unique 1646 \n",
"top [{'url': 'https://github.com/gbif/ipt/wiki/IPT... \n",
"freq 6 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.year-creation \\\n",
"count 1541.000000 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 2007.894873 \n",
"std 10.933713 \n",
"min 1894.000000 \n",
"25% 2004.000000 \n",
"50% 2010.000000 \n",
"75% 2015.000000 \n",
"max 2022.000000 \n",
"\n",
" attributes.metadata.data-processes \\\n",
"count 1626 \n",
"unique 1625 \n",
"top [{'url': 'https://site.uit.no/dataverseno/abou... \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.cross-references \\\n",
"count 790 \n",
"unique 790 \n",
"top [{'url': 'https://www.re3data.org/repository/r... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
"count 1853 1853 \n",
"unique 1799 1 \n",
"top [] Database \n",
"freq 55 1853 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.record-type attributes.subjects attributes.domains \\\n",
"count 1853 1853 1853 \n",
"unique 3 935 1205 \n",
"top repository [Life Science] [] \n",
"freq 954 345 276 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" attributes.taxonomies attributes.user-defined-tags \\\n",
"count 1853 1853 \n",
"unique 385 395 \n",
"top [All] [] \n",
"freq 528 1258 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.countries attributes.name \\\n",
"count 1853 1853 \n",
"unique 194 1851 \n",
"top [United States] FAIRsharing record for: iDog \n",
"freq 607 2 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.abbreviation \\\n",
"count 1671 \n",
"unique 1655 \n",
"top CGD \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.url \\\n",
"count 1853 \n",
"unique 1853 \n",
"top https://fairsharing.org/10.25504/FAIRsharing.d... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.doi \\\n",
"count 1601 \n",
"unique 1601 \n",
"top 10.25504/FAIRsharing.d6423b \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.fairsharing-licence \\\n",
"count 1853 \n",
"unique 1 \n",
"top https://creativecommons.org/licenses/by-sa/4.0... \n",
"freq 1853 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.description \\\n",
"count 1853 \n",
"unique 1853 \n",
"top This FAIRsharing record describes: The WDC-SIL... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.publications attributes.licence-links \\\n",
"count 1853 1853 \n",
"unique 1135 1119 \n",
"top [] [] \n",
"freq 690 735 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.url-for-logo \\\n",
"count 18 \n",
"unique 18 \n",
"top /rails/active_storage/blobs/redirect/eyJfcmFpb... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.citations attributes.metadata.associated-tools \\\n",
"count 621 632 \n",
"unique 331 627 \n",
"top [] [] \n",
"freq 285 3 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.deprecation-reason \\\n",
"count 363 \n",
"unique 104 \n",
"top \n",
"freq 125 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-access-condition.type \\\n",
"count 42 \n",
"unique 2 \n",
"top open \n",
"freq 38 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-contact-information \\\n",
"count 47 \n",
"unique 2 \n",
"top yes \n",
"freq 45 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.url \\\n",
"count 22 \n",
"unique 22 \n",
"top https://bmrb.io/deposit/ \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.type \\\n",
"count 33 \n",
"unique 2 \n",
"top controlled \n",
"freq 21 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.deprecation-date \\\n",
"count 238 \n",
"unique 71 \n",
"top 2021-9-17 \n",
"freq 81 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.access-points \\\n",
"count 465 \n",
"unique 460 \n",
"top [{'url': 'https://heidata.uni-heidelberg.de/oa... \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-access-condition.url \\\n",
"count 19 \n",
"unique 19 \n",
"top https://arch.library.northwestern.edu/about?lo... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.resource-sustainability.url \\\n",
"count 2 \n",
"unique 2 \n",
"top https://www.library.northwestern.edu/about/adm... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.resource-sustainability.name \\\n",
"count 2 \n",
"unique 2 \n",
"top Commitment to Sustainability: Level 1 \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.url \\\n",
"count 3 \n",
"unique 3 \n",
"top http://www.library.northwestern.edu/about/admi... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.name \\\n",
"count 3 \n",
"unique 3 \n",
"top Digital Preservation Policy: Level 1 \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-access-for-pre-publication-review \\\n",
"count 10 \n",
"unique 2 \n",
"top yes \n",
"freq 9 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-versioning \\\n",
"count 17 \n",
"unique 2 \n",
"top yes \n",
"freq 16 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-curation.type \\\n",
"count 22 \n",
"unique 4 \n",
"top manual \n",
"freq 11 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-curation.url \\\n",
"count 8 \n",
"unique 8 \n",
"top https://www.gbif.org/tools/data-validator/about \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.citation-to-related-publications \\\n",
"count 35 \n",
"unique 2 \n",
"top yes \n",
"freq 34 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.tombstone \n",
"count 1 \n",
"unique 1 \n",
"top True \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fairsharing_df.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0\n",
"type 0\n",
"attributes.created-at 0\n",
"attributes.updated-at 0\n",
"attributes.metadata.doi 252\n",
"attributes.metadata.name 0\n",
"attributes.metadata.status 0\n",
"attributes.metadata.contacts 89\n",
"attributes.metadata.homepage 0\n",
"attributes.metadata.identifier 0\n",
"attributes.metadata.description 0\n",
"attributes.metadata.abbreviation 182\n",
"attributes.metadata.support-links 190\n",
"attributes.metadata.year-creation 312\n",
"attributes.metadata.data-processes 227\n",
"attributes.metadata.cross-references 1063\n",
"attributes.legacy-ids 0\n",
"attributes.fairsharing-registry 0\n",
"attributes.record-type 0\n",
"attributes.subjects 0\n",
"attributes.domains 0\n",
"attributes.taxonomies 0\n",
"attributes.user-defined-tags 0\n",
"attributes.countries 0\n",
"attributes.name 0\n",
"attributes.abbreviation 182\n",
"attributes.url 0\n",
"attributes.doi 252\n",
"attributes.fairsharing-licence 0\n",
"attributes.description 0\n",
"attributes.publications 0\n",
"attributes.licence-links 0\n",
"attributes.url-for-logo 1835\n",
"attributes.metadata.citations 1232\n",
"attributes.metadata.associated-tools 1221\n",
"attributes.metadata.deprecation-reason 1490\n",
"attributes.metadata.data-access-condition.type 1811\n",
"attributes.metadata.data-contact-information 1806\n",
"attributes.metadata.data-deposition-condition.url 1831\n",
"attributes.metadata.data-deposition-condition.type 1820\n",
"attributes.metadata.deprecation-date 1615\n",
"attributes.metadata.access-points 1388\n",
"attributes.metadata.data-access-condition.url 1834\n",
"attributes.metadata.resource-sustainability.url 1851\n",
"attributes.metadata.resource-sustainability.name 1851\n",
"attributes.metadata.data-preservation-policy.url 1850\n",
"attributes.metadata.data-preservation-policy.name 1850\n",
"attributes.metadata.data-access-for-pre-publication-review 1843\n",
"attributes.metadata.data-versioning 1836\n",
"attributes.metadata.data-curation.type 1831\n",
"attributes.metadata.data-curation.url 1845\n",
"attributes.metadata.citation-to-related-publications 1818\n",
"attributes.metadata.tombstone 1852\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fairsharing_df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"attributes.record-type\n",
"knowledgebase 787\n",
"knowledgebase_and_repository 112\n",
"repository 954\n",
"dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(fairsharing_df['attributes.record-type']).groupby('attributes.record-type').size()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}