registries_analysis/notebooks/01.4-exploration-fairsharin...

1486 lines
67 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"import csv\n",
"import json\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading datasets"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>attributes.created-at</th>\n",
" <th>attributes.updated-at</th>\n",
" <th>attributes.metadata.doi</th>\n",
" <th>attributes.metadata.name</th>\n",
" <th>attributes.metadata.status</th>\n",
" <th>attributes.metadata.contacts</th>\n",
" <th>attributes.metadata.homepage</th>\n",
" <th>attributes.metadata.identifier</th>\n",
" <th>attributes.metadata.description</th>\n",
" <th>attributes.metadata.support-links</th>\n",
" <th>attributes.metadata.year-creation</th>\n",
" <th>attributes.metadata.data-processes</th>\n",
" <th>attributes.legacy-ids</th>\n",
" <th>attributes.fairsharing-registry</th>\n",
" <th>attributes.record-type</th>\n",
" <th>attributes.subjects</th>\n",
" <th>attributes.domains</th>\n",
" <th>attributes.taxonomies</th>\n",
" <th>attributes.user-defined-tags</th>\n",
" <th>attributes.countries</th>\n",
" <th>attributes.name</th>\n",
" <th>attributes.abbreviation</th>\n",
" <th>attributes.url</th>\n",
" <th>attributes.doi</th>\n",
" <th>attributes.fairsharing-licence</th>\n",
" <th>attributes.description</th>\n",
" <th>attributes.publications</th>\n",
" <th>attributes.licence-links</th>\n",
" <th>attributes.metadata.citations</th>\n",
" <th>attributes.metadata.abbreviation</th>\n",
" <th>attributes.metadata.access-points</th>\n",
" <th>attributes.metadata.associated-tools</th>\n",
" <th>attributes.metadata.deprecation-date</th>\n",
" <th>attributes.metadata.deprecation-reason</th>\n",
" <th>attributes.metadata.tombstone</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1723</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2014-11-04T15:23:40.000Z</td>\n",
" <td>2021-09-30T11:39:06.829Z</td>\n",
" <td>10.25504/FAIRsharing.8t18te</td>\n",
" <td>Cell Image Library</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'David Orloff', 'contact-ema...</td>\n",
" <td>http://www.cellimagelibrary.org</td>\n",
" <td>1723</td>\n",
" <td>This library is a public and easily accessible...</td>\n",
" <td>[{'url': 'http://www.cellimagelibrary.org/page...</td>\n",
" <td>2010.0</td>\n",
" <td>[{'name': 'live update', 'type': 'data release...</td>\n",
" <td>[biodbcore-000180, bsg-d000180]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Cell Biology, Life Science]</td>\n",
" <td>[Cell, Microscopy, Light microscopy, Electron ...</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: Cell Image Library</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.8...</td>\n",
" <td>10.25504/FAIRsharing.8t18te</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: This librar...</td>\n",
" <td>[{'id': 232, 'pubmed_id': 23203874, 'title': '...</td>\n",
" <td>[{'licence-name': 'Cell Image Library Data Pol...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3101</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2020-09-16T08:49:13.000Z</td>\n",
" <td>2021-09-30T11:36:45.452Z</td>\n",
" <td>NaN</td>\n",
" <td>WHOI Ship Data-Grabber System</td>\n",
" <td>ready</td>\n",
" <td>NaN</td>\n",
" <td>http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html</td>\n",
" <td>3101</td>\n",
" <td>The WHOI Ship DataGrabber system provides the ...</td>\n",
" <td>[{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o...</td>\n",
" <td>2004.0</td>\n",
" <td>[{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai...</td>\n",
" <td>[biodbcore-001609, bsg-d001609]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Earth Science, Water Research, Oceanography]</td>\n",
" <td>[]</td>\n",
" <td>[Not applicable]</td>\n",
" <td>[subseafloor environments]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: WHOI Ship Data-Grabber...</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/fairsharing_records/3101</td>\n",
" <td>None</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: The WHOI Sh...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'NDSF Data Archive Policy', ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2649</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2018-08-07T20:23:32.000Z</td>\n",
" <td>2021-09-30T11:39:07.898Z</td>\n",
" <td>NaN</td>\n",
" <td>Electron Microscope Public Image Archive</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'General contact', 'contact-...</td>\n",
" <td>https://www.ebi.ac.uk/pdbe/emdb/empiar/</td>\n",
" <td>2649</td>\n",
" <td>EMPIAR, the Electron Microscopy Public Image A...</td>\n",
" <td>[{'url': 'https://www.ebi.ac.uk/support/EMPIAR...</td>\n",
" <td>2015.0</td>\n",
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
" <td>[biodbcore-001140, bsg-d001140]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Bioinformatics, Biology]</td>\n",
" <td>[Protein image, Microscopy, Electron microscop...</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[Greece, Czech Republic, United Kingdom, Icela...</td>\n",
" <td>FAIRsharing record for: Electron Microscope Pu...</td>\n",
" <td>EMPIAR</td>\n",
" <td>https://fairsharing.org/fairsharing_records/2649</td>\n",
" <td>None</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: EMPIAR, the...</td>\n",
" <td>[{'id': 2232, 'pubmed_id': 27067018, 'title': ...</td>\n",
" <td>[{'licence-name': 'EMBL-EBI Terms of Use', 'li...</td>\n",
" <td>[{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27...</td>\n",
" <td>EMPIAR</td>\n",
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2657</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2018-08-13T15:12:11.000Z</td>\n",
" <td>2021-09-30T11:37:28.736Z</td>\n",
" <td>10.25504/FAIRsharing.tnByoG</td>\n",
" <td>ClinicalStudyDataRequest.com</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-email': 'support@clinicalstudydatar...</td>\n",
" <td>https://clinicalstudydatarequest.com/</td>\n",
" <td>2657</td>\n",
" <td>ClinicalStudyDataRequest.com (CSDR) is a conso...</td>\n",
" <td>[{'url': 'https://clinicalstudydatarequest.com...</td>\n",
" <td>2014.0</td>\n",
" <td>[{'url': 'https://clinicalstudydatarequest.com...</td>\n",
" <td>[biodbcore-001149, bsg-d001149]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Preclinical Studies, Biomedical Science]</td>\n",
" <td>[]</td>\n",
" <td>[Homo sapiens]</td>\n",
" <td>[]</td>\n",
" <td>[Worldwide]</td>\n",
" <td>FAIRsharing record for: ClinicalStudyDataReque...</td>\n",
" <td>CSDR</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.t...</td>\n",
" <td>10.25504/FAIRsharing.tnByoG</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: ClinicalStu...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'CSDR Data Sharing Agreement...</td>\n",
" <td>NaN</td>\n",
" <td>CSDR</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2078</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2014-11-04T15:23:40.000Z</td>\n",
" <td>2021-09-30T11:34:43.129Z</td>\n",
" <td>10.25504/FAIRsharing.3axym7</td>\n",
" <td>Germplasm Resources Information Network</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-email': 'dbmu@ars-grin.gov'}]</td>\n",
" <td>https://www.ars-grin.gov/</td>\n",
" <td>2078</td>\n",
" <td>GRIN provides National Genetic Resources Progr...</td>\n",
" <td>[{'url': 'https://www.ars-grin.gov/Pages/Colle...</td>\n",
" <td>2010.0</td>\n",
" <td>[{'url': 'https://www.ars-grin.gov/', 'name': ...</td>\n",
" <td>[biodbcore-000546, bsg-d000546]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Life Science]</td>\n",
" <td>[Cell, Cell culture, Germplasm]</td>\n",
" <td>[Bacteria, Metazoa, Viridiplantae]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: Germplasm Resources In...</td>\n",
" <td>GRIN</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.3...</td>\n",
" <td>10.25504/FAIRsharing.3axym7</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: GRIN provid...</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>NaN</td>\n",
" <td>GRIN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type attributes.created-at \\\n",
"0 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"1 3101 fairsharing-records 2020-09-16T08:49:13.000Z \n",
"2 2649 fairsharing-records 2018-08-07T20:23:32.000Z \n",
"3 2657 fairsharing-records 2018-08-13T15:12:11.000Z \n",
"4 2078 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"\n",
" attributes.updated-at attributes.metadata.doi \\\n",
"0 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n",
"1 2021-09-30T11:36:45.452Z NaN \n",
"2 2021-09-30T11:39:07.898Z NaN \n",
"3 2021-09-30T11:37:28.736Z 10.25504/FAIRsharing.tnByoG \n",
"4 2021-09-30T11:34:43.129Z 10.25504/FAIRsharing.3axym7 \n",
"\n",
" attributes.metadata.name attributes.metadata.status \\\n",
"0 Cell Image Library ready \n",
"1 WHOI Ship Data-Grabber System ready \n",
"2 Electron Microscope Public Image Archive ready \n",
"3 ClinicalStudyDataRequest.com ready \n",
"4 Germplasm Resources Information Network ready \n",
"\n",
" attributes.metadata.contacts \\\n",
"0 [{'contact-name': 'David Orloff', 'contact-ema... \n",
"1 NaN \n",
"2 [{'contact-name': 'General contact', 'contact-... \n",
"3 [{'contact-email': 'support@clinicalstudydatar... \n",
"4 [{'contact-email': 'dbmu@ars-grin.gov'}] \n",
"\n",
" attributes.metadata.homepage \\\n",
"0 http://www.cellimagelibrary.org \n",
"1 http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html \n",
"2 https://www.ebi.ac.uk/pdbe/emdb/empiar/ \n",
"3 https://clinicalstudydatarequest.com/ \n",
"4 https://www.ars-grin.gov/ \n",
"\n",
" attributes.metadata.identifier \\\n",
"0 1723 \n",
"1 3101 \n",
"2 2649 \n",
"3 2657 \n",
"4 2078 \n",
"\n",
" attributes.metadata.description \\\n",
"0 This library is a public and easily accessible... \n",
"1 The WHOI Ship DataGrabber system provides the ... \n",
"2 EMPIAR, the Electron Microscopy Public Image A... \n",
"3 ClinicalStudyDataRequest.com (CSDR) is a conso... \n",
"4 GRIN provides National Genetic Resources Progr... \n",
"\n",
" attributes.metadata.support-links \\\n",
"0 [{'url': 'http://www.cellimagelibrary.org/page... \n",
"1 [{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o... \n",
"2 [{'url': 'https://www.ebi.ac.uk/support/EMPIAR... \n",
"3 [{'url': 'https://clinicalstudydatarequest.com... \n",
"4 [{'url': 'https://www.ars-grin.gov/Pages/Colle... \n",
"\n",
" attributes.metadata.year-creation \\\n",
"0 2010.0 \n",
"1 2004.0 \n",
"2 2015.0 \n",
"3 2014.0 \n",
"4 2010.0 \n",
"\n",
" attributes.metadata.data-processes \\\n",
"0 [{'name': 'live update', 'type': 'data release... \n",
"1 [{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai... \n",
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
"3 [{'url': 'https://clinicalstudydatarequest.com... \n",
"4 [{'url': 'https://www.ars-grin.gov/', 'name': ... \n",
"\n",
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
"0 [biodbcore-000180, bsg-d000180] Database \n",
"1 [biodbcore-001609, bsg-d001609] Database \n",
"2 [biodbcore-001140, bsg-d001140] Database \n",
"3 [biodbcore-001149, bsg-d001149] Database \n",
"4 [biodbcore-000546, bsg-d000546] Database \n",
"\n",
" attributes.record-type attributes.subjects \\\n",
"0 repository [Cell Biology, Life Science] \n",
"1 repository [Earth Science, Water Research, Oceanography] \n",
"2 repository [Bioinformatics, Biology] \n",
"3 repository [Preclinical Studies, Biomedical Science] \n",
"4 repository [Life Science] \n",
"\n",
" attributes.domains \\\n",
"0 [Cell, Microscopy, Light microscopy, Electron ... \n",
"1 [] \n",
"2 [Protein image, Microscopy, Electron microscop... \n",
"3 [] \n",
"4 [Cell, Cell culture, Germplasm] \n",
"\n",
" attributes.taxonomies attributes.user-defined-tags \\\n",
"0 [All] [] \n",
"1 [Not applicable] [subseafloor environments] \n",
"2 [All] [] \n",
"3 [Homo sapiens] [] \n",
"4 [Bacteria, Metazoa, Viridiplantae] [] \n",
"\n",
" attributes.countries \\\n",
"0 [United States] \n",
"1 [United States] \n",
"2 [Greece, Czech Republic, United Kingdom, Icela... \n",
"3 [Worldwide] \n",
"4 [United States] \n",
"\n",
" attributes.name attributes.abbreviation \\\n",
"0 FAIRsharing record for: Cell Image Library None \n",
"1 FAIRsharing record for: WHOI Ship Data-Grabber... None \n",
"2 FAIRsharing record for: Electron Microscope Pu... EMPIAR \n",
"3 FAIRsharing record for: ClinicalStudyDataReque... CSDR \n",
"4 FAIRsharing record for: Germplasm Resources In... GRIN \n",
"\n",
" attributes.url \\\n",
"0 https://fairsharing.org/10.25504/FAIRsharing.8... \n",
"1 https://fairsharing.org/fairsharing_records/3101 \n",
"2 https://fairsharing.org/fairsharing_records/2649 \n",
"3 https://fairsharing.org/10.25504/FAIRsharing.t... \n",
"4 https://fairsharing.org/10.25504/FAIRsharing.3... \n",
"\n",
" attributes.doi \\\n",
"0 10.25504/FAIRsharing.8t18te \n",
"1 None \n",
"2 None \n",
"3 10.25504/FAIRsharing.tnByoG \n",
"4 10.25504/FAIRsharing.3axym7 \n",
"\n",
" attributes.fairsharing-licence \\\n",
"0 https://creativecommons.org/licenses/by-sa/4.0... \n",
"1 https://creativecommons.org/licenses/by-sa/4.0... \n",
"2 https://creativecommons.org/licenses/by-sa/4.0... \n",
"3 https://creativecommons.org/licenses/by-sa/4.0... \n",
"4 https://creativecommons.org/licenses/by-sa/4.0... \n",
"\n",
" attributes.description \\\n",
"0 This FAIRsharing record describes: This librar... \n",
"1 This FAIRsharing record describes: The WHOI Sh... \n",
"2 This FAIRsharing record describes: EMPIAR, the... \n",
"3 This FAIRsharing record describes: ClinicalStu... \n",
"4 This FAIRsharing record describes: GRIN provid... \n",
"\n",
" attributes.publications \\\n",
"0 [{'id': 232, 'pubmed_id': 23203874, 'title': '... \n",
"1 [] \n",
"2 [{'id': 2232, 'pubmed_id': 27067018, 'title': ... \n",
"3 [] \n",
"4 [] \n",
"\n",
" attributes.licence-links \\\n",
"0 [{'licence-name': 'Cell Image Library Data Pol... \n",
"1 [{'licence-name': 'NDSF Data Archive Policy', ... \n",
"2 [{'licence-name': 'EMBL-EBI Terms of Use', 'li... \n",
"3 [{'licence-name': 'CSDR Data Sharing Agreement... \n",
"4 [] \n",
"\n",
" attributes.metadata.citations \\\n",
"0 NaN \n",
"1 NaN \n",
"2 [{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.abbreviation \\\n",
"0 NaN \n",
"1 NaN \n",
"2 EMPIAR \n",
"3 CSDR \n",
"4 GRIN \n",
"\n",
" attributes.metadata.access-points \\\n",
"0 NaN \n",
"1 NaN \n",
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.associated-tools \\\n",
"0 NaN \n",
"1 NaN \n",
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.deprecation-date attributes.metadata.deprecation-reason \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" attributes.metadata.tombstone \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('../data/raw/fairsharing_dump_api_09_2021.json') as f:\n",
" lines = f.read().splitlines()\n",
" \n",
"fairsharing_df = pd.DataFrame(lines)\n",
"fairsharing_df.columns = ['json_element']\n",
"fairsharing_df['json_element'].apply(json.loads)\n",
"fairsharing_df = pd.json_normalize(fairsharing_df['json_element'].apply(json.loads))\n",
"\n",
"fairsharing_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>attributes.created-at</th>\n",
" <th>attributes.updated-at</th>\n",
" <th>attributes.metadata.doi</th>\n",
" <th>attributes.metadata.name</th>\n",
" <th>attributes.metadata.status</th>\n",
" <th>attributes.metadata.contacts</th>\n",
" <th>attributes.metadata.homepage</th>\n",
" <th>attributes.metadata.identifier</th>\n",
" <th>attributes.metadata.description</th>\n",
" <th>attributes.metadata.support-links</th>\n",
" <th>attributes.metadata.year-creation</th>\n",
" <th>attributes.metadata.data-processes</th>\n",
" <th>attributes.legacy-ids</th>\n",
" <th>attributes.fairsharing-registry</th>\n",
" <th>attributes.record-type</th>\n",
" <th>attributes.subjects</th>\n",
" <th>attributes.domains</th>\n",
" <th>attributes.taxonomies</th>\n",
" <th>attributes.user-defined-tags</th>\n",
" <th>attributes.countries</th>\n",
" <th>attributes.name</th>\n",
" <th>attributes.abbreviation</th>\n",
" <th>attributes.url</th>\n",
" <th>attributes.doi</th>\n",
" <th>attributes.fairsharing-licence</th>\n",
" <th>attributes.description</th>\n",
" <th>attributes.publications</th>\n",
" <th>attributes.licence-links</th>\n",
" <th>attributes.metadata.citations</th>\n",
" <th>attributes.metadata.abbreviation</th>\n",
" <th>attributes.metadata.access-points</th>\n",
" <th>attributes.metadata.associated-tools</th>\n",
" <th>attributes.metadata.deprecation-date</th>\n",
" <th>attributes.metadata.deprecation-reason</th>\n",
" <th>attributes.metadata.tombstone</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1354</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1678</td>\n",
" <td>1797</td>\n",
" <td>1797.000000</td>\n",
" <td>1797</td>\n",
" <td>1608</td>\n",
" <td>1492.000000</td>\n",
" <td>1565</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1638</td>\n",
" <td>1797</td>\n",
" <td>1354</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>1797</td>\n",
" <td>326</td>\n",
" <td>1638</td>\n",
" <td>449</td>\n",
" <td>618</td>\n",
" <td>217</td>\n",
" <td>217</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>1797</td>\n",
" <td>1</td>\n",
" <td>1162</td>\n",
" <td>1797</td>\n",
" <td>1354</td>\n",
" <td>1796</td>\n",
" <td>4</td>\n",
" <td>1576</td>\n",
" <td>1797</td>\n",
" <td>NaN</td>\n",
" <td>1797</td>\n",
" <td>1594</td>\n",
" <td>NaN</td>\n",
" <td>1563</td>\n",
" <td>1797</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>888</td>\n",
" <td>1163</td>\n",
" <td>378</td>\n",
" <td>384</td>\n",
" <td>185</td>\n",
" <td>1796</td>\n",
" <td>1626</td>\n",
" <td>1797</td>\n",
" <td>1354</td>\n",
" <td>1</td>\n",
" <td>1797</td>\n",
" <td>1109</td>\n",
" <td>1082</td>\n",
" <td>320</td>\n",
" <td>1626</td>\n",
" <td>444</td>\n",
" <td>615</td>\n",
" <td>55</td>\n",
" <td>86</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>1723</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2014-11-04T15:23:40.000Z</td>\n",
" <td>2021-09-30T11:39:06.829Z</td>\n",
" <td>10.25504/FAIRsharing.8t18te</td>\n",
" <td>OmicsDB</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'Sam Hokin', 'contact-email'...</td>\n",
" <td>http://www.cellimagelibrary.org</td>\n",
" <td>NaN</td>\n",
" <td>This library is a public and easily accessible...</td>\n",
" <td>[{'url': 'https://github.com/gbif/ipt/wiki/IPT...</td>\n",
" <td>NaN</td>\n",
" <td>[{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea...</td>\n",
" <td>[biodbcore-000180, bsg-d000180]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Life Science]</td>\n",
" <td>[]</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: OmicsDB</td>\n",
" <td>CGD</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.8...</td>\n",
" <td>10.25504/FAIRsharing.8t18te</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: This librar...</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>[{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31...</td>\n",
" <td>CGD</td>\n",
" <td>[{'url': 'https://github.com/Ensembl', 'name':...</td>\n",
" <td>[{'url': 'http://www.h-invitational.jp/hinv/bl...</td>\n",
" <td>2021-9-17</td>\n",
" <td>This resource is no longer available at the st...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>1797</td>\n",
" <td>636</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1540</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1797</td>\n",
" <td>926</td>\n",
" <td>350</td>\n",
" <td>265</td>\n",
" <td>502</td>\n",
" <td>1193</td>\n",
" <td>594</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1797</td>\n",
" <td>1</td>\n",
" <td>661</td>\n",
" <td>716</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>84</td>\n",
" <td>113</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2446.100167</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2007.636059</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>520.058757</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.953269</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1547.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1894.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1996.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2004.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2445.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2010.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2897.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2014.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3346.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2021.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type attributes.created-at \\\n",
"count 1797 1797 1797 \n",
"unique 1797 1 1162 \n",
"top 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"freq 1 1797 636 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" attributes.updated-at attributes.metadata.doi \\\n",
"count 1797 1354 \n",
"unique 1797 1354 \n",
"top 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n",
"freq 1 1 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.name attributes.metadata.status \\\n",
"count 1797 1797 \n",
"unique 1796 4 \n",
"top OmicsDB ready \n",
"freq 2 1540 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.contacts \\\n",
"count 1678 \n",
"unique 1576 \n",
"top [{'contact-name': 'Sam Hokin', 'contact-email'... \n",
"freq 6 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.homepage attributes.metadata.identifier \\\n",
"count 1797 1797.000000 \n",
"unique 1797 NaN \n",
"top http://www.cellimagelibrary.org NaN \n",
"freq 1 NaN \n",
"mean NaN 2446.100167 \n",
"std NaN 520.058757 \n",
"min NaN 1547.000000 \n",
"25% NaN 1996.000000 \n",
"50% NaN 2445.000000 \n",
"75% NaN 2897.000000 \n",
"max NaN 3346.000000 \n",
"\n",
" attributes.metadata.description \\\n",
"count 1797 \n",
"unique 1797 \n",
"top This library is a public and easily accessible... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.support-links \\\n",
"count 1608 \n",
"unique 1594 \n",
"top [{'url': 'https://github.com/gbif/ipt/wiki/IPT... \n",
"freq 6 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.year-creation \\\n",
"count 1492.000000 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 2007.636059 \n",
"std 10.953269 \n",
"min 1894.000000 \n",
"25% 2004.000000 \n",
"50% 2010.000000 \n",
"75% 2014.000000 \n",
"max 2021.000000 \n",
"\n",
" attributes.metadata.data-processes \\\n",
"count 1565 \n",
"unique 1563 \n",
"top [{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea... \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
"count 1797 1797 \n",
"unique 1797 1 \n",
"top [biodbcore-000180, bsg-d000180] Database \n",
"freq 1 1797 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.record-type attributes.subjects attributes.domains \\\n",
"count 1797 1797 1797 \n",
"unique 3 888 1163 \n",
"top repository [Life Science] [] \n",
"freq 926 350 265 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" attributes.taxonomies attributes.user-defined-tags \\\n",
"count 1797 1797 \n",
"unique 378 384 \n",
"top [All] [] \n",
"freq 502 1193 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.countries attributes.name \\\n",
"count 1797 1797 \n",
"unique 185 1796 \n",
"top [United States] FAIRsharing record for: OmicsDB \n",
"freq 594 2 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.abbreviation \\\n",
"count 1638 \n",
"unique 1626 \n",
"top CGD \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.url \\\n",
"count 1797 \n",
"unique 1797 \n",
"top https://fairsharing.org/10.25504/FAIRsharing.8... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.doi \\\n",
"count 1354 \n",
"unique 1354 \n",
"top 10.25504/FAIRsharing.8t18te \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.fairsharing-licence \\\n",
"count 1797 \n",
"unique 1 \n",
"top https://creativecommons.org/licenses/by-sa/4.0... \n",
"freq 1797 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.description \\\n",
"count 1797 \n",
"unique 1797 \n",
"top This FAIRsharing record describes: This librar... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.publications attributes.licence-links \\\n",
"count 1797 1797 \n",
"unique 1109 1082 \n",
"top [] [] \n",
"freq 661 716 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.citations \\\n",
"count 326 \n",
"unique 320 \n",
"top [{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31... \n",
"freq 6 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.abbreviation \\\n",
"count 1638 \n",
"unique 1626 \n",
"top CGD \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.access-points \\\n",
"count 449 \n",
"unique 444 \n",
"top [{'url': 'https://github.com/Ensembl', 'name':... \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.associated-tools \\\n",
"count 618 \n",
"unique 615 \n",
"top [{'url': 'http://www.h-invitational.jp/hinv/bl... \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.deprecation-date \\\n",
"count 217 \n",
"unique 55 \n",
"top 2021-9-17 \n",
"freq 84 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.deprecation-reason \\\n",
"count 217 \n",
"unique 86 \n",
"top This resource is no longer available at the st... \n",
"freq 113 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.tombstone \n",
"count 1 \n",
"unique 1 \n",
"top True \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fairsharing_df.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0\n",
"type 0\n",
"attributes.created-at 0\n",
"attributes.updated-at 0\n",
"attributes.metadata.doi 443\n",
"attributes.metadata.name 0\n",
"attributes.metadata.status 0\n",
"attributes.metadata.contacts 119\n",
"attributes.metadata.homepage 0\n",
"attributes.metadata.identifier 0\n",
"attributes.metadata.description 0\n",
"attributes.metadata.support-links 189\n",
"attributes.metadata.year-creation 305\n",
"attributes.metadata.data-processes 232\n",
"attributes.legacy-ids 0\n",
"attributes.fairsharing-registry 0\n",
"attributes.record-type 0\n",
"attributes.subjects 0\n",
"attributes.domains 0\n",
"attributes.taxonomies 0\n",
"attributes.user-defined-tags 0\n",
"attributes.countries 0\n",
"attributes.name 0\n",
"attributes.abbreviation 159\n",
"attributes.url 0\n",
"attributes.doi 443\n",
"attributes.fairsharing-licence 0\n",
"attributes.description 0\n",
"attributes.publications 0\n",
"attributes.licence-links 0\n",
"attributes.metadata.citations 1471\n",
"attributes.metadata.abbreviation 159\n",
"attributes.metadata.access-points 1348\n",
"attributes.metadata.associated-tools 1179\n",
"attributes.metadata.deprecation-date 1580\n",
"attributes.metadata.deprecation-reason 1580\n",
"attributes.metadata.tombstone 1796\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fairsharing_df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"attributes.record-type\n",
"knowledgebase 774\n",
"knowledgebase_and_repository 97\n",
"repository 926\n",
"dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(fairsharing_df['attributes.record-type']).groupby('attributes.record-type').size()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}