2090 lines
98 KiB
Plaintext
2090 lines
98 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import ast\n",
|
|
"import csv\n",
|
|
"import json\n",
|
|
"\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"import plotly\n",
|
|
"from plotly.offline import iplot, init_notebook_mode\n",
|
|
"import plotly.graph_objs as go\n",
|
|
"import plotly.express as px\n",
|
|
"\n",
|
|
"pd.set_option('display.max_columns', None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Loading datasets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>id</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>attributes.created-at</th>\n",
|
|
" <th>attributes.updated-at</th>\n",
|
|
" <th>attributes.metadata.doi</th>\n",
|
|
" <th>attributes.metadata.name</th>\n",
|
|
" <th>attributes.metadata.status</th>\n",
|
|
" <th>attributes.metadata.contacts</th>\n",
|
|
" <th>attributes.metadata.homepage</th>\n",
|
|
" <th>attributes.metadata.identifier</th>\n",
|
|
" <th>attributes.metadata.description</th>\n",
|
|
" <th>attributes.metadata.abbreviation</th>\n",
|
|
" <th>attributes.metadata.support-links</th>\n",
|
|
" <th>attributes.metadata.year-creation</th>\n",
|
|
" <th>attributes.metadata.data-processes</th>\n",
|
|
" <th>attributes.metadata.cross-references</th>\n",
|
|
" <th>attributes.legacy-ids</th>\n",
|
|
" <th>attributes.fairsharing-registry</th>\n",
|
|
" <th>attributes.record-type</th>\n",
|
|
" <th>attributes.subjects</th>\n",
|
|
" <th>attributes.domains</th>\n",
|
|
" <th>attributes.taxonomies</th>\n",
|
|
" <th>attributes.user-defined-tags</th>\n",
|
|
" <th>attributes.countries</th>\n",
|
|
" <th>attributes.name</th>\n",
|
|
" <th>attributes.abbreviation</th>\n",
|
|
" <th>attributes.url</th>\n",
|
|
" <th>attributes.doi</th>\n",
|
|
" <th>attributes.fairsharing-licence</th>\n",
|
|
" <th>attributes.description</th>\n",
|
|
" <th>attributes.publications</th>\n",
|
|
" <th>attributes.licence-links</th>\n",
|
|
" <th>attributes.url-for-logo</th>\n",
|
|
" <th>attributes.metadata.citations</th>\n",
|
|
" <th>attributes.metadata.associated-tools</th>\n",
|
|
" <th>attributes.metadata.deprecation-reason</th>\n",
|
|
" <th>attributes.metadata.data-access-condition.type</th>\n",
|
|
" <th>attributes.metadata.data-contact-information</th>\n",
|
|
" <th>attributes.metadata.data-deposition-condition.url</th>\n",
|
|
" <th>attributes.metadata.data-deposition-condition.type</th>\n",
|
|
" <th>attributes.metadata.deprecation-date</th>\n",
|
|
" <th>attributes.metadata.access-points</th>\n",
|
|
" <th>attributes.metadata.data-access-condition.url</th>\n",
|
|
" <th>attributes.metadata.resource-sustainability.url</th>\n",
|
|
" <th>attributes.metadata.resource-sustainability.name</th>\n",
|
|
" <th>attributes.metadata.data-preservation-policy.url</th>\n",
|
|
" <th>attributes.metadata.data-preservation-policy.name</th>\n",
|
|
" <th>attributes.metadata.data-access-for-pre-publication-review</th>\n",
|
|
" <th>attributes.metadata.data-versioning</th>\n",
|
|
" <th>attributes.metadata.data-curation.type</th>\n",
|
|
" <th>attributes.metadata.data-curation.url</th>\n",
|
|
" <th>attributes.metadata.citation-to-related-publications</th>\n",
|
|
" <th>attributes.metadata.tombstone</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>3226</td>\n",
|
|
" <td>fairsharing-records</td>\n",
|
|
" <td>2020-12-09T11:53:44.000Z</td>\n",
|
|
" <td>2022-02-08T10:42:36.452Z</td>\n",
|
|
" <td>10.25504/FAIRsharing.d6423b</td>\n",
|
|
" <td>WDC Sunspot Index and Long-term Solar Observat...</td>\n",
|
|
" <td>ready</td>\n",
|
|
" <td>[{'contact-name': 'Frédéric Clette', 'contact-...</td>\n",
|
|
" <td>http://sidc.be/silso/home</td>\n",
|
|
" <td>3226</td>\n",
|
|
" <td>The WDC-SILSO is an activity of the Operationa...</td>\n",
|
|
" <td>WDC-SILSO</td>\n",
|
|
" <td>[{'url': 'http://www.sidc.be/silso/taxonomy/te...</td>\n",
|
|
" <td>2013.0</td>\n",
|
|
" <td>[{'url': 'http://www.sidc.be/silso/datafiles',...</td>\n",
|
|
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
|
|
" <td>[biodbcore-001740, bsg-d001740]</td>\n",
|
|
" <td>Database</td>\n",
|
|
" <td>repository</td>\n",
|
|
" <td>[Electromagnetism, Astrophysics and Astronomy,...</td>\n",
|
|
" <td>[Climate, Observation design]</td>\n",
|
|
" <td>[Not applicable]</td>\n",
|
|
" <td>[Climate change, earth observation, Electromag...</td>\n",
|
|
" <td>[Belgium]</td>\n",
|
|
" <td>FAIRsharing record for: WDC Sunspot Index and ...</td>\n",
|
|
" <td>WDC-SILSO</td>\n",
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
|
|
" <td>10.25504/FAIRsharing.d6423b</td>\n",
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
" <td>This FAIRsharing record describes: The WDC-SIL...</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[{'licence-name': 'SILSO legal notices', 'lice...</td>\n",
|
|
" <td>None</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2114</td>\n",
|
|
" <td>fairsharing-records</td>\n",
|
|
" <td>2014-11-04T15:23:40.000Z</td>\n",
|
|
" <td>2022-01-21T14:39:02.195Z</td>\n",
|
|
" <td>10.25504/FAIRsharing.p06nme</td>\n",
|
|
" <td>Biological Magnetic Resonance Data Bank</td>\n",
|
|
" <td>ready</td>\n",
|
|
" <td>[{'contact-name': 'Helpdesk', 'contact-email':...</td>\n",
|
|
" <td>https://bmrb.io/</td>\n",
|
|
" <td>2114</td>\n",
|
|
" <td>BMRB collects, annotates, archives, and dissem...</td>\n",
|
|
" <td>BMRB</td>\n",
|
|
" <td>[{'url': 'https://bmrb.io/bmrb/news/', 'name':...</td>\n",
|
|
" <td>1988.0</td>\n",
|
|
" <td>[{'url': 'https://bmrb.io/data_library/rsync.s...</td>\n",
|
|
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
|
|
" <td>[biodbcore-000584, bsg-d000584]</td>\n",
|
|
" <td>Database</td>\n",
|
|
" <td>repository</td>\n",
|
|
" <td>[Structural Biology]</td>\n",
|
|
" <td>[Molecular structure, Protein structure, Pepti...</td>\n",
|
|
" <td>[All]</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[United States]</td>\n",
|
|
" <td>FAIRsharing record for: Biological Magnetic Re...</td>\n",
|
|
" <td>BMRB</td>\n",
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.p...</td>\n",
|
|
" <td>10.25504/FAIRsharing.p06nme</td>\n",
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
" <td>This FAIRsharing record describes: BMRB collec...</td>\n",
|
|
" <td>[{'id': 552, 'pubmed_id': 18288446, 'title': '...</td>\n",
|
|
" <td>[{'licence-name': 'wwPDB Privacy and Usage Pol...</td>\n",
|
|
" <td>None</td>\n",
|
|
" <td>[{'doi': '10.1093/nar/gkm957', 'pubmed-id': 17...</td>\n",
|
|
" <td>[{'url': 'https://bmrb.io/validate/', 'name': ...</td>\n",
|
|
" <td></td>\n",
|
|
" <td>open</td>\n",
|
|
" <td>yes</td>\n",
|
|
" <td>https://bmrb.io/deposit/</td>\n",
|
|
" <td>open</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>3022</td>\n",
|
|
" <td>fairsharing-records</td>\n",
|
|
" <td>2020-06-17T10:25:30.000Z</td>\n",
|
|
" <td>2022-02-08T10:41:04.073Z</td>\n",
|
|
" <td>10.25504/FAIRsharing.8b7a2f</td>\n",
|
|
" <td>Fisheries and Oceans Canada Pacific Region Dat...</td>\n",
|
|
" <td>ready</td>\n",
|
|
" <td>[{'contact-name': 'Peter Chandler', 'contact-e...</td>\n",
|
|
" <td>http://www.pac.dfo-mpo.gc.ca/science/oceans/da...</td>\n",
|
|
" <td>3022</td>\n",
|
|
" <td>The Institute of Ocean Sciences (IOS)/Ocean Sc...</td>\n",
|
|
" <td>None</td>\n",
|
|
" <td>[{'url': 'DFO.PAC.SCI.IOSData-DonneesISO.SCI.P...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>[{'name': 'Users must contact the Senior Analy...</td>\n",
|
|
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
|
|
" <td>[biodbcore-001530, bsg-d001530]</td>\n",
|
|
" <td>Database</td>\n",
|
|
" <td>repository</td>\n",
|
|
" <td>[Environmental Science, Meteorology, Earth Sci...</td>\n",
|
|
" <td>[Climate]</td>\n",
|
|
" <td>[Not applicable]</td>\n",
|
|
" <td>[Salinity, Temperature]</td>\n",
|
|
" <td>[Canada]</td>\n",
|
|
" <td>FAIRsharing record for: Fisheries and Oceans C...</td>\n",
|
|
" <td>None</td>\n",
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.8...</td>\n",
|
|
" <td>10.25504/FAIRsharing.8b7a2f</td>\n",
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
" <td>This FAIRsharing record describes: The Institu...</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[{'licence-name': 'Fisheries and Oceans Canada...</td>\n",
|
|
" <td>None</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>2998</td>\n",
|
|
" <td>fairsharing-records</td>\n",
|
|
" <td>2020-05-21T07:42:30.000Z</td>\n",
|
|
" <td>2022-02-08T10:40:19.531Z</td>\n",
|
|
" <td>10.25504/FAIRsharing.e08886</td>\n",
|
|
" <td>Climate Prediction Center</td>\n",
|
|
" <td>ready</td>\n",
|
|
" <td>[{'contact-name': 'Jon Hoopingarner', 'contact...</td>\n",
|
|
" <td>https://www.cpc.ncep.noaa.gov/</td>\n",
|
|
" <td>2998</td>\n",
|
|
" <td>The Climate Prediction Center (CPC) produces o...</td>\n",
|
|
" <td>CPC</td>\n",
|
|
" <td>[{'url': 'https://www.cpc.ncep.noaa.gov/commen...</td>\n",
|
|
" <td>1970.0</td>\n",
|
|
" <td>[{'url': 'https://www.cpc.ncep.noaa.gov/', 'na...</td>\n",
|
|
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
|
|
" <td>[biodbcore-001504, bsg-d001504]</td>\n",
|
|
" <td>Database</td>\n",
|
|
" <td>repository</td>\n",
|
|
" <td>[Hydrogeology, Geography, Meteorology, Geodesy...</td>\n",
|
|
" <td>[Climate]</td>\n",
|
|
" <td>[Not applicable]</td>\n",
|
|
" <td>[Forecasting, weather]</td>\n",
|
|
" <td>[United States]</td>\n",
|
|
" <td>FAIRsharing record for: Climate Prediction Center</td>\n",
|
|
" <td>CPC</td>\n",
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.e...</td>\n",
|
|
" <td>10.25504/FAIRsharing.e08886</td>\n",
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
" <td>This FAIRsharing record describes: The Climate...</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[{'licence-name': 'National Weather Service Di...</td>\n",
|
|
" <td>None</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>2301</td>\n",
|
|
" <td>fairsharing-records</td>\n",
|
|
" <td>2016-06-03T14:54:08.000Z</td>\n",
|
|
" <td>2021-11-24T13:17:51.201Z</td>\n",
|
|
" <td>10.25504/FAIRsharing.meh9wz</td>\n",
|
|
" <td>Acytostelium Gene Database</td>\n",
|
|
" <td>deprecated</td>\n",
|
|
" <td>[{'contact-name': 'Acytostelium genome consort...</td>\n",
|
|
" <td>http://cosmos.bot.kyoto-u.ac.jp/acytodb//cgi-b...</td>\n",
|
|
" <td>2301</td>\n",
|
|
" <td>Genome and transcriptome database of Acytostel...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2008.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>[biodbcore-000775, bsg-d000775]</td>\n",
|
|
" <td>Database</td>\n",
|
|
" <td>repository</td>\n",
|
|
" <td>[Genomics, Life Science, Transcriptomics]</td>\n",
|
|
" <td>[DNA sequence data, Gene model annotation]</td>\n",
|
|
" <td>[Acytostelium subglobosum]</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[United Kingdom, Japan]</td>\n",
|
|
" <td>FAIRsharing record for: Acytostelium Gene Data...</td>\n",
|
|
" <td>None</td>\n",
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.m...</td>\n",
|
|
" <td>10.25504/FAIRsharing.meh9wz</td>\n",
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
" <td>This FAIRsharing record describes: Genome and ...</td>\n",
|
|
" <td>[{'id': 1139, 'pubmed_id': 25758444, 'title': ...</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>None</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>This resource is no longer available at the st...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-9-17</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" id type attributes.created-at \\\n",
|
|
"0 3226 fairsharing-records 2020-12-09T11:53:44.000Z \n",
|
|
"1 2114 fairsharing-records 2014-11-04T15:23:40.000Z \n",
|
|
"2 3022 fairsharing-records 2020-06-17T10:25:30.000Z \n",
|
|
"3 2998 fairsharing-records 2020-05-21T07:42:30.000Z \n",
|
|
"4 2301 fairsharing-records 2016-06-03T14:54:08.000Z \n",
|
|
"\n",
|
|
" attributes.updated-at attributes.metadata.doi \\\n",
|
|
"0 2022-02-08T10:42:36.452Z 10.25504/FAIRsharing.d6423b \n",
|
|
"1 2022-01-21T14:39:02.195Z 10.25504/FAIRsharing.p06nme \n",
|
|
"2 2022-02-08T10:41:04.073Z 10.25504/FAIRsharing.8b7a2f \n",
|
|
"3 2022-02-08T10:40:19.531Z 10.25504/FAIRsharing.e08886 \n",
|
|
"4 2021-11-24T13:17:51.201Z 10.25504/FAIRsharing.meh9wz \n",
|
|
"\n",
|
|
" attributes.metadata.name \\\n",
|
|
"0 WDC Sunspot Index and Long-term Solar Observat... \n",
|
|
"1 Biological Magnetic Resonance Data Bank \n",
|
|
"2 Fisheries and Oceans Canada Pacific Region Dat... \n",
|
|
"3 Climate Prediction Center \n",
|
|
"4 Acytostelium Gene Database \n",
|
|
"\n",
|
|
" attributes.metadata.status \\\n",
|
|
"0 ready \n",
|
|
"1 ready \n",
|
|
"2 ready \n",
|
|
"3 ready \n",
|
|
"4 deprecated \n",
|
|
"\n",
|
|
" attributes.metadata.contacts \\\n",
|
|
"0 [{'contact-name': 'Frédéric Clette', 'contact-... \n",
|
|
"1 [{'contact-name': 'Helpdesk', 'contact-email':... \n",
|
|
"2 [{'contact-name': 'Peter Chandler', 'contact-e... \n",
|
|
"3 [{'contact-name': 'Jon Hoopingarner', 'contact... \n",
|
|
"4 [{'contact-name': 'Acytostelium genome consort... \n",
|
|
"\n",
|
|
" attributes.metadata.homepage \\\n",
|
|
"0 http://sidc.be/silso/home \n",
|
|
"1 https://bmrb.io/ \n",
|
|
"2 http://www.pac.dfo-mpo.gc.ca/science/oceans/da... \n",
|
|
"3 https://www.cpc.ncep.noaa.gov/ \n",
|
|
"4 http://cosmos.bot.kyoto-u.ac.jp/acytodb//cgi-b... \n",
|
|
"\n",
|
|
" attributes.metadata.identifier \\\n",
|
|
"0 3226 \n",
|
|
"1 2114 \n",
|
|
"2 3022 \n",
|
|
"3 2998 \n",
|
|
"4 2301 \n",
|
|
"\n",
|
|
" attributes.metadata.description \\\n",
|
|
"0 The WDC-SILSO is an activity of the Operationa... \n",
|
|
"1 BMRB collects, annotates, archives, and dissem... \n",
|
|
"2 The Institute of Ocean Sciences (IOS)/Ocean Sc... \n",
|
|
"3 The Climate Prediction Center (CPC) produces o... \n",
|
|
"4 Genome and transcriptome database of Acytostel... \n",
|
|
"\n",
|
|
" attributes.metadata.abbreviation \\\n",
|
|
"0 WDC-SILSO \n",
|
|
"1 BMRB \n",
|
|
"2 None \n",
|
|
"3 CPC \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.support-links \\\n",
|
|
"0 [{'url': 'http://www.sidc.be/silso/taxonomy/te... \n",
|
|
"1 [{'url': 'https://bmrb.io/bmrb/news/', 'name':... \n",
|
|
"2 [{'url': 'DFO.PAC.SCI.IOSData-DonneesISO.SCI.P... \n",
|
|
"3 [{'url': 'https://www.cpc.ncep.noaa.gov/commen... \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.year-creation \\\n",
|
|
"0 2013.0 \n",
|
|
"1 1988.0 \n",
|
|
"2 NaN \n",
|
|
"3 1970.0 \n",
|
|
"4 2008.0 \n",
|
|
"\n",
|
|
" attributes.metadata.data-processes \\\n",
|
|
"0 [{'url': 'http://www.sidc.be/silso/datafiles',... \n",
|
|
"1 [{'url': 'https://bmrb.io/data_library/rsync.s... \n",
|
|
"2 [{'name': 'Users must contact the Senior Analy... \n",
|
|
"3 [{'url': 'https://www.cpc.ncep.noaa.gov/', 'na... \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.cross-references \\\n",
|
|
"0 [{'url': 'https://www.re3data.org/repository/r... \n",
|
|
"1 [{'url': 'https://www.re3data.org/repository/r... \n",
|
|
"2 [{'url': 'https://www.re3data.org/repository/r... \n",
|
|
"3 [{'url': 'https://www.re3data.org/repository/r... \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
|
|
"0 [biodbcore-001740, bsg-d001740] Database \n",
|
|
"1 [biodbcore-000584, bsg-d000584] Database \n",
|
|
"2 [biodbcore-001530, bsg-d001530] Database \n",
|
|
"3 [biodbcore-001504, bsg-d001504] Database \n",
|
|
"4 [biodbcore-000775, bsg-d000775] Database \n",
|
|
"\n",
|
|
" attributes.record-type attributes.subjects \\\n",
|
|
"0 repository [Electromagnetism, Astrophysics and Astronomy,... \n",
|
|
"1 repository [Structural Biology] \n",
|
|
"2 repository [Environmental Science, Meteorology, Earth Sci... \n",
|
|
"3 repository [Hydrogeology, Geography, Meteorology, Geodesy... \n",
|
|
"4 repository [Genomics, Life Science, Transcriptomics] \n",
|
|
"\n",
|
|
" attributes.domains \\\n",
|
|
"0 [Climate, Observation design] \n",
|
|
"1 [Molecular structure, Protein structure, Pepti... \n",
|
|
"2 [Climate] \n",
|
|
"3 [Climate] \n",
|
|
"4 [DNA sequence data, Gene model annotation] \n",
|
|
"\n",
|
|
" attributes.taxonomies \\\n",
|
|
"0 [Not applicable] \n",
|
|
"1 [All] \n",
|
|
"2 [Not applicable] \n",
|
|
"3 [Not applicable] \n",
|
|
"4 [Acytostelium subglobosum] \n",
|
|
"\n",
|
|
" attributes.user-defined-tags attributes.countries \\\n",
|
|
"0 [Climate change, earth observation, Electromag... [Belgium] \n",
|
|
"1 [] [United States] \n",
|
|
"2 [Salinity, Temperature] [Canada] \n",
|
|
"3 [Forecasting, weather] [United States] \n",
|
|
"4 [] [United Kingdom, Japan] \n",
|
|
"\n",
|
|
" attributes.name attributes.abbreviation \\\n",
|
|
"0 FAIRsharing record for: WDC Sunspot Index and ... WDC-SILSO \n",
|
|
"1 FAIRsharing record for: Biological Magnetic Re... BMRB \n",
|
|
"2 FAIRsharing record for: Fisheries and Oceans C... None \n",
|
|
"3 FAIRsharing record for: Climate Prediction Center CPC \n",
|
|
"4 FAIRsharing record for: Acytostelium Gene Data... None \n",
|
|
"\n",
|
|
" attributes.url \\\n",
|
|
"0 https://fairsharing.org/10.25504/FAIRsharing.d... \n",
|
|
"1 https://fairsharing.org/10.25504/FAIRsharing.p... \n",
|
|
"2 https://fairsharing.org/10.25504/FAIRsharing.8... \n",
|
|
"3 https://fairsharing.org/10.25504/FAIRsharing.e... \n",
|
|
"4 https://fairsharing.org/10.25504/FAIRsharing.m... \n",
|
|
"\n",
|
|
" attributes.doi \\\n",
|
|
"0 10.25504/FAIRsharing.d6423b \n",
|
|
"1 10.25504/FAIRsharing.p06nme \n",
|
|
"2 10.25504/FAIRsharing.8b7a2f \n",
|
|
"3 10.25504/FAIRsharing.e08886 \n",
|
|
"4 10.25504/FAIRsharing.meh9wz \n",
|
|
"\n",
|
|
" attributes.fairsharing-licence \\\n",
|
|
"0 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
"1 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
"2 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
"3 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
"4 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
"\n",
|
|
" attributes.description \\\n",
|
|
"0 This FAIRsharing record describes: The WDC-SIL... \n",
|
|
"1 This FAIRsharing record describes: BMRB collec... \n",
|
|
"2 This FAIRsharing record describes: The Institu... \n",
|
|
"3 This FAIRsharing record describes: The Climate... \n",
|
|
"4 This FAIRsharing record describes: Genome and ... \n",
|
|
"\n",
|
|
" attributes.publications \\\n",
|
|
"0 [] \n",
|
|
"1 [{'id': 552, 'pubmed_id': 18288446, 'title': '... \n",
|
|
"2 [] \n",
|
|
"3 [] \n",
|
|
"4 [{'id': 1139, 'pubmed_id': 25758444, 'title': ... \n",
|
|
"\n",
|
|
" attributes.licence-links attributes.url-for-logo \\\n",
|
|
"0 [{'licence-name': 'SILSO legal notices', 'lice... None \n",
|
|
"1 [{'licence-name': 'wwPDB Privacy and Usage Pol... None \n",
|
|
"2 [{'licence-name': 'Fisheries and Oceans Canada... None \n",
|
|
"3 [{'licence-name': 'National Weather Service Di... None \n",
|
|
"4 [] None \n",
|
|
"\n",
|
|
" attributes.metadata.citations \\\n",
|
|
"0 NaN \n",
|
|
"1 [{'doi': '10.1093/nar/gkm957', 'pubmed-id': 17... \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.associated-tools \\\n",
|
|
"0 NaN \n",
|
|
"1 [{'url': 'https://bmrb.io/validate/', 'name': ... \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.deprecation-reason \\\n",
|
|
"0 NaN \n",
|
|
"1 \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 This resource is no longer available at the st... \n",
|
|
"\n",
|
|
" attributes.metadata.data-access-condition.type \\\n",
|
|
"0 NaN \n",
|
|
"1 open \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-contact-information \\\n",
|
|
"0 NaN \n",
|
|
"1 yes \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-deposition-condition.url \\\n",
|
|
"0 NaN \n",
|
|
"1 https://bmrb.io/deposit/ \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-deposition-condition.type \\\n",
|
|
"0 NaN \n",
|
|
"1 open \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.deprecation-date attributes.metadata.access-points \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 2021-9-17 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-access-condition.url \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.resource-sustainability.url \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.resource-sustainability.name \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-preservation-policy.url \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-preservation-policy.name \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-access-for-pre-publication-review \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-versioning attributes.metadata.data-curation.type \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-curation.url \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.citation-to-related-publications \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" attributes.metadata.tombstone \n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN "
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"with open('../data/raw/fairsharing_dump_api_02_2022.json') as f:\n",
|
|
" lines = f.read().splitlines()\n",
|
|
" \n",
|
|
"fairsharing_df = pd.DataFrame(lines)\n",
|
|
"fairsharing_df.columns = ['json_element']\n",
|
|
"fairsharing_df['json_element'].apply(json.loads)\n",
|
|
"fairsharing_df = pd.json_normalize(fairsharing_df['json_element'].apply(json.loads))\n",
|
|
"\n",
|
|
"fairsharing_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>id</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>attributes.created-at</th>\n",
|
|
" <th>attributes.updated-at</th>\n",
|
|
" <th>attributes.metadata.doi</th>\n",
|
|
" <th>attributes.metadata.name</th>\n",
|
|
" <th>attributes.metadata.status</th>\n",
|
|
" <th>attributes.metadata.contacts</th>\n",
|
|
" <th>attributes.metadata.homepage</th>\n",
|
|
" <th>attributes.metadata.identifier</th>\n",
|
|
" <th>attributes.metadata.description</th>\n",
|
|
" <th>attributes.metadata.abbreviation</th>\n",
|
|
" <th>attributes.metadata.support-links</th>\n",
|
|
" <th>attributes.metadata.year-creation</th>\n",
|
|
" <th>attributes.metadata.data-processes</th>\n",
|
|
" <th>attributes.metadata.cross-references</th>\n",
|
|
" <th>attributes.legacy-ids</th>\n",
|
|
" <th>attributes.fairsharing-registry</th>\n",
|
|
" <th>attributes.record-type</th>\n",
|
|
" <th>attributes.subjects</th>\n",
|
|
" <th>attributes.domains</th>\n",
|
|
" <th>attributes.taxonomies</th>\n",
|
|
" <th>attributes.user-defined-tags</th>\n",
|
|
" <th>attributes.countries</th>\n",
|
|
" <th>attributes.name</th>\n",
|
|
" <th>attributes.abbreviation</th>\n",
|
|
" <th>attributes.url</th>\n",
|
|
" <th>attributes.doi</th>\n",
|
|
" <th>attributes.fairsharing-licence</th>\n",
|
|
" <th>attributes.description</th>\n",
|
|
" <th>attributes.publications</th>\n",
|
|
" <th>attributes.licence-links</th>\n",
|
|
" <th>attributes.url-for-logo</th>\n",
|
|
" <th>attributes.metadata.citations</th>\n",
|
|
" <th>attributes.metadata.associated-tools</th>\n",
|
|
" <th>attributes.metadata.deprecation-reason</th>\n",
|
|
" <th>attributes.metadata.data-access-condition.type</th>\n",
|
|
" <th>attributes.metadata.data-contact-information</th>\n",
|
|
" <th>attributes.metadata.data-deposition-condition.url</th>\n",
|
|
" <th>attributes.metadata.data-deposition-condition.type</th>\n",
|
|
" <th>attributes.metadata.deprecation-date</th>\n",
|
|
" <th>attributes.metadata.access-points</th>\n",
|
|
" <th>attributes.metadata.data-access-condition.url</th>\n",
|
|
" <th>attributes.metadata.resource-sustainability.url</th>\n",
|
|
" <th>attributes.metadata.resource-sustainability.name</th>\n",
|
|
" <th>attributes.metadata.data-preservation-policy.url</th>\n",
|
|
" <th>attributes.metadata.data-preservation-policy.name</th>\n",
|
|
" <th>attributes.metadata.data-access-for-pre-publication-review</th>\n",
|
|
" <th>attributes.metadata.data-versioning</th>\n",
|
|
" <th>attributes.metadata.data-curation.type</th>\n",
|
|
" <th>attributes.metadata.data-curation.url</th>\n",
|
|
" <th>attributes.metadata.citation-to-related-publications</th>\n",
|
|
" <th>attributes.metadata.tombstone</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1601</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1764</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853.000000</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1671</td>\n",
|
|
" <td>1663</td>\n",
|
|
" <td>1541.000000</td>\n",
|
|
" <td>1626</td>\n",
|
|
" <td>790</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1671</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1601</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>18</td>\n",
|
|
" <td>621</td>\n",
|
|
" <td>632</td>\n",
|
|
" <td>363</td>\n",
|
|
" <td>42</td>\n",
|
|
" <td>47</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>33</td>\n",
|
|
" <td>238</td>\n",
|
|
" <td>465</td>\n",
|
|
" <td>19</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>10</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>35</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>unique</th>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1218</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1601</td>\n",
|
|
" <td>1851</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>1623</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1655</td>\n",
|
|
" <td>1646</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1625</td>\n",
|
|
" <td>790</td>\n",
|
|
" <td>1799</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>935</td>\n",
|
|
" <td>1205</td>\n",
|
|
" <td>385</td>\n",
|
|
" <td>395</td>\n",
|
|
" <td>194</td>\n",
|
|
" <td>1851</td>\n",
|
|
" <td>1655</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1601</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1135</td>\n",
|
|
" <td>1119</td>\n",
|
|
" <td>18</td>\n",
|
|
" <td>331</td>\n",
|
|
" <td>627</td>\n",
|
|
" <td>104</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>71</td>\n",
|
|
" <td>460</td>\n",
|
|
" <td>19</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>top</th>\n",
|
|
" <td>3226</td>\n",
|
|
" <td>fairsharing-records</td>\n",
|
|
" <td>2014-11-04T15:23:40.000Z</td>\n",
|
|
" <td>2022-02-08T10:42:36.452Z</td>\n",
|
|
" <td>10.25504/FAIRsharing.d6423b</td>\n",
|
|
" <td>iDog</td>\n",
|
|
" <td>ready</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>http://sidc.be/silso/home</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>The WDC-SILSO is an activity of the Operationa...</td>\n",
|
|
" <td>CGD</td>\n",
|
|
" <td>[{'url': 'https://github.com/gbif/ipt/wiki/IPT...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>[{'url': 'https://site.uit.no/dataverseno/abou...</td>\n",
|
|
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>Database</td>\n",
|
|
" <td>repository</td>\n",
|
|
" <td>[Life Science]</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[All]</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[United States]</td>\n",
|
|
" <td>FAIRsharing record for: iDog</td>\n",
|
|
" <td>CGD</td>\n",
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
|
|
" <td>10.25504/FAIRsharing.d6423b</td>\n",
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
" <td>This FAIRsharing record describes: The WDC-SIL...</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>/rails/active_storage/blobs/redirect/eyJfcmFpb...</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td></td>\n",
|
|
" <td>open</td>\n",
|
|
" <td>yes</td>\n",
|
|
" <td>https://bmrb.io/deposit/</td>\n",
|
|
" <td>controlled</td>\n",
|
|
" <td>2021-9-17</td>\n",
|
|
" <td>[{'url': 'https://heidata.uni-heidelberg.de/oa...</td>\n",
|
|
" <td>https://arch.library.northwestern.edu/about?lo...</td>\n",
|
|
" <td>https://www.library.northwestern.edu/about/adm...</td>\n",
|
|
" <td>Commitment to Sustainability: Level 1</td>\n",
|
|
" <td>http://www.library.northwestern.edu/about/admi...</td>\n",
|
|
" <td>Digital Preservation Policy: Level 1</td>\n",
|
|
" <td>yes</td>\n",
|
|
" <td>yes</td>\n",
|
|
" <td>manual</td>\n",
|
|
" <td>https://www.gbif.org/tools/data-validator/about</td>\n",
|
|
" <td>yes</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>freq</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>636</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>1564</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>55</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>954</td>\n",
|
|
" <td>345</td>\n",
|
|
" <td>276</td>\n",
|
|
" <td>528</td>\n",
|
|
" <td>1258</td>\n",
|
|
" <td>607</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1853</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>690</td>\n",
|
|
" <td>735</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>285</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>125</td>\n",
|
|
" <td>38</td>\n",
|
|
" <td>45</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>81</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>11</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>34</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mean</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2481.862925</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2007.894873</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>std</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>554.072492</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>10.933713</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>min</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1120.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1894.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25%</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2009.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2004.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50%</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2473.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2010.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75%</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2938.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2015.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>max</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>3827.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022.000000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" id type attributes.created-at \\\n",
|
|
"count 1853 1853 1853 \n",
|
|
"unique 1853 1 1218 \n",
|
|
"top 3226 fairsharing-records 2014-11-04T15:23:40.000Z \n",
|
|
"freq 1 1853 636 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" attributes.updated-at attributes.metadata.doi \\\n",
|
|
"count 1853 1601 \n",
|
|
"unique 1853 1601 \n",
|
|
"top 2022-02-08T10:42:36.452Z 10.25504/FAIRsharing.d6423b \n",
|
|
"freq 1 1 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" attributes.metadata.name attributes.metadata.status \\\n",
|
|
"count 1853 1853 \n",
|
|
"unique 1851 4 \n",
|
|
"top iDog ready \n",
|
|
"freq 2 1564 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" attributes.metadata.contacts attributes.metadata.homepage \\\n",
|
|
"count 1764 1853 \n",
|
|
"unique 1623 1853 \n",
|
|
"top [] http://sidc.be/silso/home \n",
|
|
"freq 40 1 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" attributes.metadata.identifier \\\n",
|
|
"count 1853.000000 \n",
|
|
"unique NaN \n",
|
|
"top NaN \n",
|
|
"freq NaN \n",
|
|
"mean 2481.862925 \n",
|
|
"std 554.072492 \n",
|
|
"min 1120.000000 \n",
|
|
"25% 2009.000000 \n",
|
|
"50% 2473.000000 \n",
|
|
"75% 2938.000000 \n",
|
|
"max 3827.000000 \n",
|
|
"\n",
|
|
" attributes.metadata.description \\\n",
|
|
"count 1853 \n",
|
|
"unique 1853 \n",
|
|
"top The WDC-SILSO is an activity of the Operationa... \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.abbreviation \\\n",
|
|
"count 1671 \n",
|
|
"unique 1655 \n",
|
|
"top CGD \n",
|
|
"freq 3 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.support-links \\\n",
|
|
"count 1663 \n",
|
|
"unique 1646 \n",
|
|
"top [{'url': 'https://github.com/gbif/ipt/wiki/IPT... \n",
|
|
"freq 6 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.year-creation \\\n",
|
|
"count 1541.000000 \n",
|
|
"unique NaN \n",
|
|
"top NaN \n",
|
|
"freq NaN \n",
|
|
"mean 2007.894873 \n",
|
|
"std 10.933713 \n",
|
|
"min 1894.000000 \n",
|
|
"25% 2004.000000 \n",
|
|
"50% 2010.000000 \n",
|
|
"75% 2015.000000 \n",
|
|
"max 2022.000000 \n",
|
|
"\n",
|
|
" attributes.metadata.data-processes \\\n",
|
|
"count 1626 \n",
|
|
"unique 1625 \n",
|
|
"top [{'url': 'https://site.uit.no/dataverseno/abou... \n",
|
|
"freq 2 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.cross-references \\\n",
|
|
"count 790 \n",
|
|
"unique 790 \n",
|
|
"top [{'url': 'https://www.re3data.org/repository/r... \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
|
|
"count 1853 1853 \n",
|
|
"unique 1799 1 \n",
|
|
"top [] Database \n",
|
|
"freq 55 1853 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" attributes.record-type attributes.subjects attributes.domains \\\n",
|
|
"count 1853 1853 1853 \n",
|
|
"unique 3 935 1205 \n",
|
|
"top repository [Life Science] [] \n",
|
|
"freq 954 345 276 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" attributes.taxonomies attributes.user-defined-tags \\\n",
|
|
"count 1853 1853 \n",
|
|
"unique 385 395 \n",
|
|
"top [All] [] \n",
|
|
"freq 528 1258 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" attributes.countries attributes.name \\\n",
|
|
"count 1853 1853 \n",
|
|
"unique 194 1851 \n",
|
|
"top [United States] FAIRsharing record for: iDog \n",
|
|
"freq 607 2 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" attributes.abbreviation \\\n",
|
|
"count 1671 \n",
|
|
"unique 1655 \n",
|
|
"top CGD \n",
|
|
"freq 3 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.url \\\n",
|
|
"count 1853 \n",
|
|
"unique 1853 \n",
|
|
"top https://fairsharing.org/10.25504/FAIRsharing.d... \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.doi \\\n",
|
|
"count 1601 \n",
|
|
"unique 1601 \n",
|
|
"top 10.25504/FAIRsharing.d6423b \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.fairsharing-licence \\\n",
|
|
"count 1853 \n",
|
|
"unique 1 \n",
|
|
"top https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
"freq 1853 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.description \\\n",
|
|
"count 1853 \n",
|
|
"unique 1853 \n",
|
|
"top This FAIRsharing record describes: The WDC-SIL... \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.publications attributes.licence-links \\\n",
|
|
"count 1853 1853 \n",
|
|
"unique 1135 1119 \n",
|
|
"top [] [] \n",
|
|
"freq 690 735 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" attributes.url-for-logo \\\n",
|
|
"count 18 \n",
|
|
"unique 18 \n",
|
|
"top /rails/active_storage/blobs/redirect/eyJfcmFpb... \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.citations attributes.metadata.associated-tools \\\n",
|
|
"count 621 632 \n",
|
|
"unique 331 627 \n",
|
|
"top [] [] \n",
|
|
"freq 285 3 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" attributes.metadata.deprecation-reason \\\n",
|
|
"count 363 \n",
|
|
"unique 104 \n",
|
|
"top \n",
|
|
"freq 125 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-access-condition.type \\\n",
|
|
"count 42 \n",
|
|
"unique 2 \n",
|
|
"top open \n",
|
|
"freq 38 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-contact-information \\\n",
|
|
"count 47 \n",
|
|
"unique 2 \n",
|
|
"top yes \n",
|
|
"freq 45 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-deposition-condition.url \\\n",
|
|
"count 22 \n",
|
|
"unique 22 \n",
|
|
"top https://bmrb.io/deposit/ \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-deposition-condition.type \\\n",
|
|
"count 33 \n",
|
|
"unique 2 \n",
|
|
"top controlled \n",
|
|
"freq 21 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.deprecation-date \\\n",
|
|
"count 238 \n",
|
|
"unique 71 \n",
|
|
"top 2021-9-17 \n",
|
|
"freq 81 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.access-points \\\n",
|
|
"count 465 \n",
|
|
"unique 460 \n",
|
|
"top [{'url': 'https://heidata.uni-heidelberg.de/oa... \n",
|
|
"freq 3 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-access-condition.url \\\n",
|
|
"count 19 \n",
|
|
"unique 19 \n",
|
|
"top https://arch.library.northwestern.edu/about?lo... \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.resource-sustainability.url \\\n",
|
|
"count 2 \n",
|
|
"unique 2 \n",
|
|
"top https://www.library.northwestern.edu/about/adm... \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.resource-sustainability.name \\\n",
|
|
"count 2 \n",
|
|
"unique 2 \n",
|
|
"top Commitment to Sustainability: Level 1 \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-preservation-policy.url \\\n",
|
|
"count 3 \n",
|
|
"unique 3 \n",
|
|
"top http://www.library.northwestern.edu/about/admi... \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-preservation-policy.name \\\n",
|
|
"count 3 \n",
|
|
"unique 3 \n",
|
|
"top Digital Preservation Policy: Level 1 \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-access-for-pre-publication-review \\\n",
|
|
"count 10 \n",
|
|
"unique 2 \n",
|
|
"top yes \n",
|
|
"freq 9 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-versioning \\\n",
|
|
"count 17 \n",
|
|
"unique 2 \n",
|
|
"top yes \n",
|
|
"freq 16 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-curation.type \\\n",
|
|
"count 22 \n",
|
|
"unique 4 \n",
|
|
"top manual \n",
|
|
"freq 11 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.data-curation.url \\\n",
|
|
"count 8 \n",
|
|
"unique 8 \n",
|
|
"top https://www.gbif.org/tools/data-validator/about \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.citation-to-related-publications \\\n",
|
|
"count 35 \n",
|
|
"unique 2 \n",
|
|
"top yes \n",
|
|
"freq 34 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" attributes.metadata.tombstone \n",
|
|
"count 1 \n",
|
|
"unique 1 \n",
|
|
"top True \n",
|
|
"freq 1 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN "
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"fairsharing_df.describe(include='all')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"id 0\n",
|
|
"type 0\n",
|
|
"attributes.created-at 0\n",
|
|
"attributes.updated-at 0\n",
|
|
"attributes.metadata.doi 252\n",
|
|
"attributes.metadata.name 0\n",
|
|
"attributes.metadata.status 0\n",
|
|
"attributes.metadata.contacts 89\n",
|
|
"attributes.metadata.homepage 0\n",
|
|
"attributes.metadata.identifier 0\n",
|
|
"attributes.metadata.description 0\n",
|
|
"attributes.metadata.abbreviation 182\n",
|
|
"attributes.metadata.support-links 190\n",
|
|
"attributes.metadata.year-creation 312\n",
|
|
"attributes.metadata.data-processes 227\n",
|
|
"attributes.metadata.cross-references 1063\n",
|
|
"attributes.legacy-ids 0\n",
|
|
"attributes.fairsharing-registry 0\n",
|
|
"attributes.record-type 0\n",
|
|
"attributes.subjects 0\n",
|
|
"attributes.domains 0\n",
|
|
"attributes.taxonomies 0\n",
|
|
"attributes.user-defined-tags 0\n",
|
|
"attributes.countries 0\n",
|
|
"attributes.name 0\n",
|
|
"attributes.abbreviation 182\n",
|
|
"attributes.url 0\n",
|
|
"attributes.doi 252\n",
|
|
"attributes.fairsharing-licence 0\n",
|
|
"attributes.description 0\n",
|
|
"attributes.publications 0\n",
|
|
"attributes.licence-links 0\n",
|
|
"attributes.url-for-logo 1835\n",
|
|
"attributes.metadata.citations 1232\n",
|
|
"attributes.metadata.associated-tools 1221\n",
|
|
"attributes.metadata.deprecation-reason 1490\n",
|
|
"attributes.metadata.data-access-condition.type 1811\n",
|
|
"attributes.metadata.data-contact-information 1806\n",
|
|
"attributes.metadata.data-deposition-condition.url 1831\n",
|
|
"attributes.metadata.data-deposition-condition.type 1820\n",
|
|
"attributes.metadata.deprecation-date 1615\n",
|
|
"attributes.metadata.access-points 1388\n",
|
|
"attributes.metadata.data-access-condition.url 1834\n",
|
|
"attributes.metadata.resource-sustainability.url 1851\n",
|
|
"attributes.metadata.resource-sustainability.name 1851\n",
|
|
"attributes.metadata.data-preservation-policy.url 1850\n",
|
|
"attributes.metadata.data-preservation-policy.name 1850\n",
|
|
"attributes.metadata.data-access-for-pre-publication-review 1843\n",
|
|
"attributes.metadata.data-versioning 1836\n",
|
|
"attributes.metadata.data-curation.type 1831\n",
|
|
"attributes.metadata.data-curation.url 1845\n",
|
|
"attributes.metadata.citation-to-related-publications 1818\n",
|
|
"attributes.metadata.tombstone 1852\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"fairsharing_df.isna().sum()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"attributes.record-type\n",
|
|
"knowledgebase 787\n",
|
|
"knowledgebase_and_repository 112\n",
|
|
"repository 954\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.DataFrame(fairsharing_df['attributes.record-type']).groupby('attributes.record-type').size()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|