2021-07-22 11:35:40 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-10-08 12:46:14 +02:00
|
|
|
"execution_count": 2,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import ast\n",
|
|
|
|
"import csv\n",
|
|
|
|
"import json\n",
|
|
|
|
"\n",
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"\n",
|
|
|
|
"import plotly\n",
|
|
|
|
"from plotly.offline import iplot, init_notebook_mode\n",
|
|
|
|
"import plotly.graph_objs as go\n",
|
|
|
|
"import plotly.express as px\n",
|
|
|
|
"\n",
|
|
|
|
"pd.set_option('display.max_columns', None)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"## Loading datasets"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-10-08 12:46:14 +02:00
|
|
|
"execution_count": 6,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <th>id</th>\n",
|
|
|
|
" <th>type</th>\n",
|
|
|
|
" <th>attributes.created-at</th>\n",
|
|
|
|
" <th>attributes.updated-at</th>\n",
|
|
|
|
" <th>attributes.metadata.doi</th>\n",
|
|
|
|
" <th>attributes.metadata.name</th>\n",
|
|
|
|
" <th>attributes.metadata.status</th>\n",
|
|
|
|
" <th>attributes.metadata.contacts</th>\n",
|
|
|
|
" <th>attributes.metadata.homepage</th>\n",
|
|
|
|
" <th>attributes.metadata.identifier</th>\n",
|
|
|
|
" <th>attributes.metadata.description</th>\n",
|
|
|
|
" <th>attributes.metadata.support-links</th>\n",
|
|
|
|
" <th>attributes.metadata.year-creation</th>\n",
|
|
|
|
" <th>attributes.metadata.data-processes</th>\n",
|
|
|
|
" <th>attributes.legacy-ids</th>\n",
|
|
|
|
" <th>attributes.fairsharing-registry</th>\n",
|
|
|
|
" <th>attributes.record-type</th>\n",
|
|
|
|
" <th>attributes.subjects</th>\n",
|
|
|
|
" <th>attributes.domains</th>\n",
|
|
|
|
" <th>attributes.taxonomies</th>\n",
|
|
|
|
" <th>attributes.user-defined-tags</th>\n",
|
|
|
|
" <th>attributes.countries</th>\n",
|
|
|
|
" <th>attributes.name</th>\n",
|
|
|
|
" <th>attributes.abbreviation</th>\n",
|
|
|
|
" <th>attributes.url</th>\n",
|
|
|
|
" <th>attributes.doi</th>\n",
|
|
|
|
" <th>attributes.fairsharing-licence</th>\n",
|
|
|
|
" <th>attributes.description</th>\n",
|
|
|
|
" <th>attributes.publications</th>\n",
|
|
|
|
" <th>attributes.licence-links</th>\n",
|
|
|
|
" <th>attributes.metadata.citations</th>\n",
|
|
|
|
" <th>attributes.metadata.abbreviation</th>\n",
|
|
|
|
" <th>attributes.metadata.access-points</th>\n",
|
|
|
|
" <th>attributes.metadata.associated-tools</th>\n",
|
|
|
|
" <th>attributes.metadata.deprecation-date</th>\n",
|
|
|
|
" <th>attributes.metadata.deprecation-reason</th>\n",
|
|
|
|
" <th>attributes.metadata.tombstone</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>1723</td>\n",
|
|
|
|
" <td>fairsharing-records</td>\n",
|
|
|
|
" <td>2014-11-04T15:23:40.000Z</td>\n",
|
|
|
|
" <td>2021-09-30T11:39:06.829Z</td>\n",
|
|
|
|
" <td>10.25504/FAIRsharing.8t18te</td>\n",
|
|
|
|
" <td>Cell Image Library</td>\n",
|
|
|
|
" <td>ready</td>\n",
|
|
|
|
" <td>[{'contact-name': 'David Orloff', 'contact-ema...</td>\n",
|
|
|
|
" <td>http://www.cellimagelibrary.org</td>\n",
|
|
|
|
" <td>1723</td>\n",
|
|
|
|
" <td>This library is a public and easily accessible...</td>\n",
|
|
|
|
" <td>[{'url': 'http://www.cellimagelibrary.org/page...</td>\n",
|
|
|
|
" <td>2010.0</td>\n",
|
|
|
|
" <td>[{'name': 'live update', 'type': 'data release...</td>\n",
|
|
|
|
" <td>[biodbcore-000180, bsg-d000180]</td>\n",
|
|
|
|
" <td>Database</td>\n",
|
|
|
|
" <td>repository</td>\n",
|
|
|
|
" <td>[Cell Biology, Life Science]</td>\n",
|
|
|
|
" <td>[Cell, Microscopy, Light microscopy, Electron ...</td>\n",
|
|
|
|
" <td>[All]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[United States]</td>\n",
|
|
|
|
" <td>FAIRsharing record for: Cell Image Library</td>\n",
|
|
|
|
" <td>None</td>\n",
|
|
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.8...</td>\n",
|
|
|
|
" <td>10.25504/FAIRsharing.8t18te</td>\n",
|
|
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
|
|
" <td>This FAIRsharing record describes: This librar...</td>\n",
|
|
|
|
" <td>[{'id': 232, 'pubmed_id': 23203874, 'title': '...</td>\n",
|
|
|
|
" <td>[{'licence-name': 'Cell Image Library Data Pol...</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>1</th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>3101</td>\n",
|
|
|
|
" <td>fairsharing-records</td>\n",
|
|
|
|
" <td>2020-09-16T08:49:13.000Z</td>\n",
|
|
|
|
" <td>2021-09-30T11:36:45.452Z</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>WHOI Ship Data-Grabber System</td>\n",
|
|
|
|
" <td>ready</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html</td>\n",
|
|
|
|
" <td>3101</td>\n",
|
|
|
|
" <td>The WHOI Ship DataGrabber system provides the ...</td>\n",
|
|
|
|
" <td>[{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o...</td>\n",
|
|
|
|
" <td>2004.0</td>\n",
|
|
|
|
" <td>[{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai...</td>\n",
|
|
|
|
" <td>[biodbcore-001609, bsg-d001609]</td>\n",
|
|
|
|
" <td>Database</td>\n",
|
|
|
|
" <td>repository</td>\n",
|
|
|
|
" <td>[Earth Science, Water Research, Oceanography]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[Not applicable]</td>\n",
|
|
|
|
" <td>[subseafloor environments]</td>\n",
|
|
|
|
" <td>[United States]</td>\n",
|
|
|
|
" <td>FAIRsharing record for: WHOI Ship Data-Grabber...</td>\n",
|
|
|
|
" <td>None</td>\n",
|
|
|
|
" <td>https://fairsharing.org/fairsharing_records/3101</td>\n",
|
|
|
|
" <td>None</td>\n",
|
|
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
|
|
" <td>This FAIRsharing record describes: The WHOI Sh...</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[{'licence-name': 'NDSF Data Archive Policy', ...</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>2</th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>2649</td>\n",
|
|
|
|
" <td>fairsharing-records</td>\n",
|
|
|
|
" <td>2018-08-07T20:23:32.000Z</td>\n",
|
|
|
|
" <td>2021-09-30T11:39:07.898Z</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>Electron Microscope Public Image Archive</td>\n",
|
|
|
|
" <td>ready</td>\n",
|
|
|
|
" <td>[{'contact-name': 'General contact', 'contact-...</td>\n",
|
|
|
|
" <td>https://www.ebi.ac.uk/pdbe/emdb/empiar/</td>\n",
|
|
|
|
" <td>2649</td>\n",
|
|
|
|
" <td>EMPIAR, the Electron Microscopy Public Image A...</td>\n",
|
|
|
|
" <td>[{'url': 'https://www.ebi.ac.uk/support/EMPIAR...</td>\n",
|
|
|
|
" <td>2015.0</td>\n",
|
|
|
|
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
|
|
|
|
" <td>[biodbcore-001140, bsg-d001140]</td>\n",
|
|
|
|
" <td>Database</td>\n",
|
|
|
|
" <td>repository</td>\n",
|
|
|
|
" <td>[Bioinformatics, Biology]</td>\n",
|
|
|
|
" <td>[Protein image, Microscopy, Electron microscop...</td>\n",
|
|
|
|
" <td>[All]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[Greece, Czech Republic, United Kingdom, Icela...</td>\n",
|
|
|
|
" <td>FAIRsharing record for: Electron Microscope Pu...</td>\n",
|
|
|
|
" <td>EMPIAR</td>\n",
|
|
|
|
" <td>https://fairsharing.org/fairsharing_records/2649</td>\n",
|
|
|
|
" <td>None</td>\n",
|
|
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
|
|
" <td>This FAIRsharing record describes: EMPIAR, the...</td>\n",
|
|
|
|
" <td>[{'id': 2232, 'pubmed_id': 27067018, 'title': ...</td>\n",
|
|
|
|
" <td>[{'licence-name': 'EMBL-EBI Terms of Use', 'li...</td>\n",
|
|
|
|
" <td>[{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27...</td>\n",
|
|
|
|
" <td>EMPIAR</td>\n",
|
|
|
|
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
|
|
|
|
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>3</th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>2657</td>\n",
|
|
|
|
" <td>fairsharing-records</td>\n",
|
|
|
|
" <td>2018-08-13T15:12:11.000Z</td>\n",
|
|
|
|
" <td>2021-09-30T11:37:28.736Z</td>\n",
|
|
|
|
" <td>10.25504/FAIRsharing.tnByoG</td>\n",
|
|
|
|
" <td>ClinicalStudyDataRequest.com</td>\n",
|
|
|
|
" <td>ready</td>\n",
|
|
|
|
" <td>[{'contact-email': 'support@clinicalstudydatar...</td>\n",
|
|
|
|
" <td>https://clinicalstudydatarequest.com/</td>\n",
|
|
|
|
" <td>2657</td>\n",
|
|
|
|
" <td>ClinicalStudyDataRequest.com (CSDR) is a conso...</td>\n",
|
|
|
|
" <td>[{'url': 'https://clinicalstudydatarequest.com...</td>\n",
|
|
|
|
" <td>2014.0</td>\n",
|
|
|
|
" <td>[{'url': 'https://clinicalstudydatarequest.com...</td>\n",
|
|
|
|
" <td>[biodbcore-001149, bsg-d001149]</td>\n",
|
|
|
|
" <td>Database</td>\n",
|
|
|
|
" <td>repository</td>\n",
|
|
|
|
" <td>[Preclinical Studies, Biomedical Science]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[Homo sapiens]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[Worldwide]</td>\n",
|
|
|
|
" <td>FAIRsharing record for: ClinicalStudyDataReque...</td>\n",
|
|
|
|
" <td>CSDR</td>\n",
|
|
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.t...</td>\n",
|
|
|
|
" <td>10.25504/FAIRsharing.tnByoG</td>\n",
|
|
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
|
|
" <td>This FAIRsharing record describes: ClinicalStu...</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[{'licence-name': 'CSDR Data Sharing Agreement...</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>CSDR</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>4</th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>2078</td>\n",
|
|
|
|
" <td>fairsharing-records</td>\n",
|
|
|
|
" <td>2014-11-04T15:23:40.000Z</td>\n",
|
|
|
|
" <td>2021-09-30T11:34:43.129Z</td>\n",
|
|
|
|
" <td>10.25504/FAIRsharing.3axym7</td>\n",
|
|
|
|
" <td>Germplasm Resources Information Network</td>\n",
|
|
|
|
" <td>ready</td>\n",
|
|
|
|
" <td>[{'contact-email': 'dbmu@ars-grin.gov'}]</td>\n",
|
|
|
|
" <td>https://www.ars-grin.gov/</td>\n",
|
|
|
|
" <td>2078</td>\n",
|
|
|
|
" <td>GRIN provides National Genetic Resources Progr...</td>\n",
|
|
|
|
" <td>[{'url': 'https://www.ars-grin.gov/Pages/Colle...</td>\n",
|
|
|
|
" <td>2010.0</td>\n",
|
|
|
|
" <td>[{'url': 'https://www.ars-grin.gov/', 'name': ...</td>\n",
|
|
|
|
" <td>[biodbcore-000546, bsg-d000546]</td>\n",
|
|
|
|
" <td>Database</td>\n",
|
|
|
|
" <td>repository</td>\n",
|
|
|
|
" <td>[Life Science]</td>\n",
|
|
|
|
" <td>[Cell, Cell culture, Germplasm]</td>\n",
|
|
|
|
" <td>[Bacteria, Metazoa, Viridiplantae]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[United States]</td>\n",
|
|
|
|
" <td>FAIRsharing record for: Germplasm Resources In...</td>\n",
|
|
|
|
" <td>GRIN</td>\n",
|
|
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.3...</td>\n",
|
|
|
|
" <td>10.25504/FAIRsharing.3axym7</td>\n",
|
|
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
|
|
" <td>This FAIRsharing record describes: GRIN provid...</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>GRIN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
2021-10-08 12:46:14 +02:00
|
|
|
" id type attributes.created-at \\\n",
|
|
|
|
"0 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n",
|
|
|
|
"1 3101 fairsharing-records 2020-09-16T08:49:13.000Z \n",
|
|
|
|
"2 2649 fairsharing-records 2018-08-07T20:23:32.000Z \n",
|
|
|
|
"3 2657 fairsharing-records 2018-08-13T15:12:11.000Z \n",
|
|
|
|
"4 2078 fairsharing-records 2014-11-04T15:23:40.000Z \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.updated-at attributes.metadata.doi \\\n",
|
|
|
|
"0 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n",
|
|
|
|
"1 2021-09-30T11:36:45.452Z NaN \n",
|
|
|
|
"2 2021-09-30T11:39:07.898Z NaN \n",
|
|
|
|
"3 2021-09-30T11:37:28.736Z 10.25504/FAIRsharing.tnByoG \n",
|
|
|
|
"4 2021-09-30T11:34:43.129Z 10.25504/FAIRsharing.3axym7 \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.name attributes.metadata.status \\\n",
|
|
|
|
"0 Cell Image Library ready \n",
|
|
|
|
"1 WHOI Ship Data-Grabber System ready \n",
|
|
|
|
"2 Electron Microscope Public Image Archive ready \n",
|
|
|
|
"3 ClinicalStudyDataRequest.com ready \n",
|
|
|
|
"4 Germplasm Resources Information Network ready \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.contacts \\\n",
|
|
|
|
"0 [{'contact-name': 'David Orloff', 'contact-ema... \n",
|
|
|
|
"1 NaN \n",
|
|
|
|
"2 [{'contact-name': 'General contact', 'contact-... \n",
|
|
|
|
"3 [{'contact-email': 'support@clinicalstudydatar... \n",
|
|
|
|
"4 [{'contact-email': 'dbmu@ars-grin.gov'}] \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.homepage \\\n",
|
|
|
|
"0 http://www.cellimagelibrary.org \n",
|
|
|
|
"1 http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html \n",
|
|
|
|
"2 https://www.ebi.ac.uk/pdbe/emdb/empiar/ \n",
|
|
|
|
"3 https://clinicalstudydatarequest.com/ \n",
|
|
|
|
"4 https://www.ars-grin.gov/ \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.identifier \\\n",
|
|
|
|
"0 1723 \n",
|
|
|
|
"1 3101 \n",
|
|
|
|
"2 2649 \n",
|
|
|
|
"3 2657 \n",
|
|
|
|
"4 2078 \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.description \\\n",
|
|
|
|
"0 This library is a public and easily accessible... \n",
|
|
|
|
"1 The WHOI Ship DataGrabber system provides the ... \n",
|
|
|
|
"2 EMPIAR, the Electron Microscopy Public Image A... \n",
|
|
|
|
"3 ClinicalStudyDataRequest.com (CSDR) is a conso... \n",
|
|
|
|
"4 GRIN provides National Genetic Resources Progr... \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.support-links \\\n",
|
|
|
|
"0 [{'url': 'http://www.cellimagelibrary.org/page... \n",
|
|
|
|
"1 [{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o... \n",
|
|
|
|
"2 [{'url': 'https://www.ebi.ac.uk/support/EMPIAR... \n",
|
|
|
|
"3 [{'url': 'https://clinicalstudydatarequest.com... \n",
|
|
|
|
"4 [{'url': 'https://www.ars-grin.gov/Pages/Colle... \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.year-creation \\\n",
|
|
|
|
"0 2010.0 \n",
|
|
|
|
"1 2004.0 \n",
|
|
|
|
"2 2015.0 \n",
|
|
|
|
"3 2014.0 \n",
|
|
|
|
"4 2010.0 \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.data-processes \\\n",
|
|
|
|
"0 [{'name': 'live update', 'type': 'data release... \n",
|
|
|
|
"1 [{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai... \n",
|
|
|
|
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
|
|
|
|
"3 [{'url': 'https://clinicalstudydatarequest.com... \n",
|
|
|
|
"4 [{'url': 'https://www.ars-grin.gov/', 'name': ... \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
|
|
|
|
"0 [biodbcore-000180, bsg-d000180] Database \n",
|
|
|
|
"1 [biodbcore-001609, bsg-d001609] Database \n",
|
|
|
|
"2 [biodbcore-001140, bsg-d001140] Database \n",
|
|
|
|
"3 [biodbcore-001149, bsg-d001149] Database \n",
|
|
|
|
"4 [biodbcore-000546, bsg-d000546] Database \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.record-type attributes.subjects \\\n",
|
|
|
|
"0 repository [Cell Biology, Life Science] \n",
|
|
|
|
"1 repository [Earth Science, Water Research, Oceanography] \n",
|
|
|
|
"2 repository [Bioinformatics, Biology] \n",
|
|
|
|
"3 repository [Preclinical Studies, Biomedical Science] \n",
|
|
|
|
"4 repository [Life Science] \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.domains \\\n",
|
|
|
|
"0 [Cell, Microscopy, Light microscopy, Electron ... \n",
|
|
|
|
"1 [] \n",
|
|
|
|
"2 [Protein image, Microscopy, Electron microscop... \n",
|
|
|
|
"3 [] \n",
|
|
|
|
"4 [Cell, Cell culture, Germplasm] \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.taxonomies attributes.user-defined-tags \\\n",
|
|
|
|
"0 [All] [] \n",
|
|
|
|
"1 [Not applicable] [subseafloor environments] \n",
|
|
|
|
"2 [All] [] \n",
|
|
|
|
"3 [Homo sapiens] [] \n",
|
|
|
|
"4 [Bacteria, Metazoa, Viridiplantae] [] \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.countries \\\n",
|
|
|
|
"0 [United States] \n",
|
|
|
|
"1 [United States] \n",
|
|
|
|
"2 [Greece, Czech Republic, United Kingdom, Icela... \n",
|
|
|
|
"3 [Worldwide] \n",
|
|
|
|
"4 [United States] \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.name attributes.abbreviation \\\n",
|
|
|
|
"0 FAIRsharing record for: Cell Image Library None \n",
|
|
|
|
"1 FAIRsharing record for: WHOI Ship Data-Grabber... None \n",
|
|
|
|
"2 FAIRsharing record for: Electron Microscope Pu... EMPIAR \n",
|
|
|
|
"3 FAIRsharing record for: ClinicalStudyDataReque... CSDR \n",
|
|
|
|
"4 FAIRsharing record for: Germplasm Resources In... GRIN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.url \\\n",
|
|
|
|
"0 https://fairsharing.org/10.25504/FAIRsharing.8... \n",
|
|
|
|
"1 https://fairsharing.org/fairsharing_records/3101 \n",
|
|
|
|
"2 https://fairsharing.org/fairsharing_records/2649 \n",
|
|
|
|
"3 https://fairsharing.org/10.25504/FAIRsharing.t... \n",
|
|
|
|
"4 https://fairsharing.org/10.25504/FAIRsharing.3... \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.doi \\\n",
|
|
|
|
"0 10.25504/FAIRsharing.8t18te \n",
|
|
|
|
"1 None \n",
|
|
|
|
"2 None \n",
|
|
|
|
"3 10.25504/FAIRsharing.tnByoG \n",
|
|
|
|
"4 10.25504/FAIRsharing.3axym7 \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.fairsharing-licence \\\n",
|
|
|
|
"0 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
|
|
"1 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
|
|
"2 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
|
|
"3 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
|
|
"4 https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.description \\\n",
|
|
|
|
"0 This FAIRsharing record describes: This librar... \n",
|
|
|
|
"1 This FAIRsharing record describes: The WHOI Sh... \n",
|
|
|
|
"2 This FAIRsharing record describes: EMPIAR, the... \n",
|
|
|
|
"3 This FAIRsharing record describes: ClinicalStu... \n",
|
|
|
|
"4 This FAIRsharing record describes: GRIN provid... \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.publications \\\n",
|
|
|
|
"0 [{'id': 232, 'pubmed_id': 23203874, 'title': '... \n",
|
|
|
|
"1 [] \n",
|
|
|
|
"2 [{'id': 2232, 'pubmed_id': 27067018, 'title': ... \n",
|
|
|
|
"3 [] \n",
|
|
|
|
"4 [] \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.licence-links \\\n",
|
|
|
|
"0 [{'licence-name': 'Cell Image Library Data Pol... \n",
|
|
|
|
"1 [{'licence-name': 'NDSF Data Archive Policy', ... \n",
|
|
|
|
"2 [{'licence-name': 'EMBL-EBI Terms of Use', 'li... \n",
|
|
|
|
"3 [{'licence-name': 'CSDR Data Sharing Agreement... \n",
|
|
|
|
"4 [] \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.citations \\\n",
|
|
|
|
"0 NaN \n",
|
|
|
|
"1 NaN \n",
|
|
|
|
"2 [{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27... \n",
|
|
|
|
"3 NaN \n",
|
|
|
|
"4 NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.abbreviation \\\n",
|
|
|
|
"0 NaN \n",
|
|
|
|
"1 NaN \n",
|
|
|
|
"2 EMPIAR \n",
|
|
|
|
"3 CSDR \n",
|
|
|
|
"4 GRIN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.access-points \\\n",
|
|
|
|
"0 NaN \n",
|
|
|
|
"1 NaN \n",
|
|
|
|
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
|
|
|
|
"3 NaN \n",
|
|
|
|
"4 NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.associated-tools \\\n",
|
|
|
|
"0 NaN \n",
|
|
|
|
"1 NaN \n",
|
|
|
|
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
|
|
|
|
"3 NaN \n",
|
|
|
|
"4 NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.deprecation-date attributes.metadata.deprecation-reason \\\n",
|
|
|
|
"0 NaN NaN \n",
|
|
|
|
"1 NaN NaN \n",
|
|
|
|
"2 NaN NaN \n",
|
|
|
|
"3 NaN NaN \n",
|
|
|
|
"4 NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.tombstone \n",
|
|
|
|
"0 NaN \n",
|
|
|
|
"1 NaN \n",
|
|
|
|
"2 NaN \n",
|
|
|
|
"3 NaN \n",
|
|
|
|
"4 NaN "
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
2021-10-08 12:46:14 +02:00
|
|
|
"execution_count": 6,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
2021-10-08 12:46:14 +02:00
|
|
|
"with open('../data/raw/fairsharing_dump_api_09_2021.json') as f:\n",
|
|
|
|
" lines = f.read().splitlines()\n",
|
|
|
|
" \n",
|
|
|
|
"fairsharing_df = pd.DataFrame(lines)\n",
|
|
|
|
"fairsharing_df.columns = ['json_element']\n",
|
|
|
|
"fairsharing_df['json_element'].apply(json.loads)\n",
|
|
|
|
"fairsharing_df = pd.json_normalize(fairsharing_df['json_element'].apply(json.loads))\n",
|
|
|
|
"\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
"fairsharing_df.head()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-10-08 12:46:14 +02:00
|
|
|
"execution_count": 7,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <th>id</th>\n",
|
|
|
|
" <th>type</th>\n",
|
|
|
|
" <th>attributes.created-at</th>\n",
|
|
|
|
" <th>attributes.updated-at</th>\n",
|
|
|
|
" <th>attributes.metadata.doi</th>\n",
|
|
|
|
" <th>attributes.metadata.name</th>\n",
|
|
|
|
" <th>attributes.metadata.status</th>\n",
|
|
|
|
" <th>attributes.metadata.contacts</th>\n",
|
|
|
|
" <th>attributes.metadata.homepage</th>\n",
|
|
|
|
" <th>attributes.metadata.identifier</th>\n",
|
|
|
|
" <th>attributes.metadata.description</th>\n",
|
|
|
|
" <th>attributes.metadata.support-links</th>\n",
|
|
|
|
" <th>attributes.metadata.year-creation</th>\n",
|
|
|
|
" <th>attributes.metadata.data-processes</th>\n",
|
|
|
|
" <th>attributes.legacy-ids</th>\n",
|
|
|
|
" <th>attributes.fairsharing-registry</th>\n",
|
|
|
|
" <th>attributes.record-type</th>\n",
|
|
|
|
" <th>attributes.subjects</th>\n",
|
|
|
|
" <th>attributes.domains</th>\n",
|
|
|
|
" <th>attributes.taxonomies</th>\n",
|
|
|
|
" <th>attributes.user-defined-tags</th>\n",
|
|
|
|
" <th>attributes.countries</th>\n",
|
|
|
|
" <th>attributes.name</th>\n",
|
|
|
|
" <th>attributes.abbreviation</th>\n",
|
|
|
|
" <th>attributes.url</th>\n",
|
|
|
|
" <th>attributes.doi</th>\n",
|
|
|
|
" <th>attributes.fairsharing-licence</th>\n",
|
|
|
|
" <th>attributes.description</th>\n",
|
|
|
|
" <th>attributes.publications</th>\n",
|
|
|
|
" <th>attributes.licence-links</th>\n",
|
|
|
|
" <th>attributes.metadata.citations</th>\n",
|
|
|
|
" <th>attributes.metadata.abbreviation</th>\n",
|
|
|
|
" <th>attributes.metadata.access-points</th>\n",
|
|
|
|
" <th>attributes.metadata.associated-tools</th>\n",
|
|
|
|
" <th>attributes.metadata.deprecation-date</th>\n",
|
|
|
|
" <th>attributes.metadata.deprecation-reason</th>\n",
|
|
|
|
" <th>attributes.metadata.tombstone</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>count</th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1354</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1678</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797.000000</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1608</td>\n",
|
|
|
|
" <td>1492.000000</td>\n",
|
|
|
|
" <td>1565</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1638</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1354</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>326</td>\n",
|
|
|
|
" <td>1638</td>\n",
|
|
|
|
" <td>449</td>\n",
|
|
|
|
" <td>618</td>\n",
|
|
|
|
" <td>217</td>\n",
|
|
|
|
" <td>217</td>\n",
|
|
|
|
" <td>1</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>unique</th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>1162</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1354</td>\n",
|
|
|
|
" <td>1796</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>1576</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1594</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1563</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>888</td>\n",
|
|
|
|
" <td>1163</td>\n",
|
|
|
|
" <td>378</td>\n",
|
|
|
|
" <td>384</td>\n",
|
|
|
|
" <td>185</td>\n",
|
|
|
|
" <td>1796</td>\n",
|
|
|
|
" <td>1626</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1354</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1109</td>\n",
|
|
|
|
" <td>1082</td>\n",
|
|
|
|
" <td>320</td>\n",
|
|
|
|
" <td>1626</td>\n",
|
|
|
|
" <td>444</td>\n",
|
|
|
|
" <td>615</td>\n",
|
|
|
|
" <td>55</td>\n",
|
|
|
|
" <td>86</td>\n",
|
|
|
|
" <td>1</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>top</th>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>1723</td>\n",
|
|
|
|
" <td>fairsharing-records</td>\n",
|
|
|
|
" <td>2014-11-04T15:23:40.000Z</td>\n",
|
|
|
|
" <td>2021-09-30T11:39:06.829Z</td>\n",
|
|
|
|
" <td>10.25504/FAIRsharing.8t18te</td>\n",
|
|
|
|
" <td>OmicsDB</td>\n",
|
|
|
|
" <td>ready</td>\n",
|
|
|
|
" <td>[{'contact-name': 'Sam Hokin', 'contact-email'...</td>\n",
|
|
|
|
" <td>http://www.cellimagelibrary.org</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>This library is a public and easily accessible...</td>\n",
|
|
|
|
" <td>[{'url': 'https://github.com/gbif/ipt/wiki/IPT...</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>[{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea...</td>\n",
|
|
|
|
" <td>[biodbcore-000180, bsg-d000180]</td>\n",
|
|
|
|
" <td>Database</td>\n",
|
|
|
|
" <td>repository</td>\n",
|
|
|
|
" <td>[Life Science]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[All]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[United States]</td>\n",
|
|
|
|
" <td>FAIRsharing record for: OmicsDB</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>CGD</td>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>https://fairsharing.org/10.25504/FAIRsharing.8...</td>\n",
|
|
|
|
" <td>10.25504/FAIRsharing.8t18te</td>\n",
|
|
|
|
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
|
|
|
|
" <td>This FAIRsharing record describes: This librar...</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>[{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31...</td>\n",
|
|
|
|
" <td>CGD</td>\n",
|
|
|
|
" <td>[{'url': 'https://github.com/Ensembl', 'name':...</td>\n",
|
|
|
|
" <td>[{'url': 'http://www.h-invitational.jp/hinv/bl...</td>\n",
|
|
|
|
" <td>2021-9-17</td>\n",
|
|
|
|
" <td>This resource is no longer available at the st...</td>\n",
|
|
|
|
" <td>True</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>freq</th>\n",
|
|
|
|
" <td>1</td>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>636</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>1540</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>926</td>\n",
|
|
|
|
" <td>350</td>\n",
|
|
|
|
" <td>265</td>\n",
|
|
|
|
" <td>502</td>\n",
|
|
|
|
" <td>1193</td>\n",
|
|
|
|
" <td>594</td>\n",
|
|
|
|
" <td>2</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>1</td>\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" <td>1797</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>661</td>\n",
|
|
|
|
" <td>716</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>84</td>\n",
|
|
|
|
" <td>113</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>mean</th>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2446.100167</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2007.636059</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>std</th>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>520.058757</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>10.953269</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>min</th>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1547.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1894.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>25%</th>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1996.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2004.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>50%</th>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2445.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2010.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>75%</th>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2897.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2014.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>max</th>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>3346.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2021.000000</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
2021-10-08 12:46:14 +02:00
|
|
|
" id type attributes.created-at \\\n",
|
|
|
|
"count 1797 1797 1797 \n",
|
|
|
|
"unique 1797 1 1162 \n",
|
|
|
|
"top 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n",
|
|
|
|
"freq 1 1797 636 \n",
|
|
|
|
"mean NaN NaN NaN \n",
|
|
|
|
"std NaN NaN NaN \n",
|
|
|
|
"min NaN NaN NaN \n",
|
|
|
|
"25% NaN NaN NaN \n",
|
|
|
|
"50% NaN NaN NaN \n",
|
|
|
|
"75% NaN NaN NaN \n",
|
|
|
|
"max NaN NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.updated-at attributes.metadata.doi \\\n",
|
|
|
|
"count 1797 1354 \n",
|
|
|
|
"unique 1797 1354 \n",
|
|
|
|
"top 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n",
|
|
|
|
"freq 1 1 \n",
|
|
|
|
"mean NaN NaN \n",
|
|
|
|
"std NaN NaN \n",
|
|
|
|
"min NaN NaN \n",
|
|
|
|
"25% NaN NaN \n",
|
|
|
|
"50% NaN NaN \n",
|
|
|
|
"75% NaN NaN \n",
|
|
|
|
"max NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.name attributes.metadata.status \\\n",
|
|
|
|
"count 1797 1797 \n",
|
|
|
|
"unique 1796 4 \n",
|
|
|
|
"top OmicsDB ready \n",
|
|
|
|
"freq 2 1540 \n",
|
|
|
|
"mean NaN NaN \n",
|
|
|
|
"std NaN NaN \n",
|
|
|
|
"min NaN NaN \n",
|
|
|
|
"25% NaN NaN \n",
|
|
|
|
"50% NaN NaN \n",
|
|
|
|
"75% NaN NaN \n",
|
|
|
|
"max NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.contacts \\\n",
|
|
|
|
"count 1678 \n",
|
|
|
|
"unique 1576 \n",
|
|
|
|
"top [{'contact-name': 'Sam Hokin', 'contact-email'... \n",
|
|
|
|
"freq 6 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.homepage attributes.metadata.identifier \\\n",
|
|
|
|
"count 1797 1797.000000 \n",
|
|
|
|
"unique 1797 NaN \n",
|
|
|
|
"top http://www.cellimagelibrary.org NaN \n",
|
|
|
|
"freq 1 NaN \n",
|
|
|
|
"mean NaN 2446.100167 \n",
|
|
|
|
"std NaN 520.058757 \n",
|
|
|
|
"min NaN 1547.000000 \n",
|
|
|
|
"25% NaN 1996.000000 \n",
|
|
|
|
"50% NaN 2445.000000 \n",
|
|
|
|
"75% NaN 2897.000000 \n",
|
|
|
|
"max NaN 3346.000000 \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.description \\\n",
|
|
|
|
"count 1797 \n",
|
|
|
|
"unique 1797 \n",
|
|
|
|
"top This library is a public and easily accessible... \n",
|
2021-07-22 11:35:40 +02:00
|
|
|
"freq 1 \n",
|
2021-10-08 12:46:14 +02:00
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
2021-07-22 11:35:40 +02:00
|
|
|
"\n",
|
2021-10-08 12:46:14 +02:00
|
|
|
" attributes.metadata.support-links \\\n",
|
|
|
|
"count 1608 \n",
|
|
|
|
"unique 1594 \n",
|
|
|
|
"top [{'url': 'https://github.com/gbif/ipt/wiki/IPT... \n",
|
|
|
|
"freq 6 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.year-creation \\\n",
|
|
|
|
"count 1492.000000 \n",
|
|
|
|
"unique NaN \n",
|
|
|
|
"top NaN \n",
|
|
|
|
"freq NaN \n",
|
|
|
|
"mean 2007.636059 \n",
|
|
|
|
"std 10.953269 \n",
|
|
|
|
"min 1894.000000 \n",
|
|
|
|
"25% 2004.000000 \n",
|
|
|
|
"50% 2010.000000 \n",
|
|
|
|
"75% 2014.000000 \n",
|
|
|
|
"max 2021.000000 \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.data-processes \\\n",
|
|
|
|
"count 1565 \n",
|
|
|
|
"unique 1563 \n",
|
|
|
|
"top [{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea... \n",
|
|
|
|
"freq 2 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
|
|
|
|
"count 1797 1797 \n",
|
|
|
|
"unique 1797 1 \n",
|
|
|
|
"top [biodbcore-000180, bsg-d000180] Database \n",
|
|
|
|
"freq 1 1797 \n",
|
|
|
|
"mean NaN NaN \n",
|
|
|
|
"std NaN NaN \n",
|
|
|
|
"min NaN NaN \n",
|
|
|
|
"25% NaN NaN \n",
|
|
|
|
"50% NaN NaN \n",
|
|
|
|
"75% NaN NaN \n",
|
|
|
|
"max NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.record-type attributes.subjects attributes.domains \\\n",
|
|
|
|
"count 1797 1797 1797 \n",
|
|
|
|
"unique 3 888 1163 \n",
|
|
|
|
"top repository [Life Science] [] \n",
|
|
|
|
"freq 926 350 265 \n",
|
|
|
|
"mean NaN NaN NaN \n",
|
|
|
|
"std NaN NaN NaN \n",
|
|
|
|
"min NaN NaN NaN \n",
|
|
|
|
"25% NaN NaN NaN \n",
|
|
|
|
"50% NaN NaN NaN \n",
|
|
|
|
"75% NaN NaN NaN \n",
|
|
|
|
"max NaN NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.taxonomies attributes.user-defined-tags \\\n",
|
|
|
|
"count 1797 1797 \n",
|
|
|
|
"unique 378 384 \n",
|
|
|
|
"top [All] [] \n",
|
|
|
|
"freq 502 1193 \n",
|
|
|
|
"mean NaN NaN \n",
|
|
|
|
"std NaN NaN \n",
|
|
|
|
"min NaN NaN \n",
|
|
|
|
"25% NaN NaN \n",
|
|
|
|
"50% NaN NaN \n",
|
|
|
|
"75% NaN NaN \n",
|
|
|
|
"max NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.countries attributes.name \\\n",
|
|
|
|
"count 1797 1797 \n",
|
|
|
|
"unique 185 1796 \n",
|
|
|
|
"top [United States] FAIRsharing record for: OmicsDB \n",
|
|
|
|
"freq 594 2 \n",
|
|
|
|
"mean NaN NaN \n",
|
|
|
|
"std NaN NaN \n",
|
|
|
|
"min NaN NaN \n",
|
|
|
|
"25% NaN NaN \n",
|
|
|
|
"50% NaN NaN \n",
|
|
|
|
"75% NaN NaN \n",
|
|
|
|
"max NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.abbreviation \\\n",
|
|
|
|
"count 1638 \n",
|
|
|
|
"unique 1626 \n",
|
|
|
|
"top CGD \n",
|
|
|
|
"freq 3 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.url \\\n",
|
|
|
|
"count 1797 \n",
|
|
|
|
"unique 1797 \n",
|
|
|
|
"top https://fairsharing.org/10.25504/FAIRsharing.8... \n",
|
|
|
|
"freq 1 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.doi \\\n",
|
|
|
|
"count 1354 \n",
|
|
|
|
"unique 1354 \n",
|
|
|
|
"top 10.25504/FAIRsharing.8t18te \n",
|
|
|
|
"freq 1 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.fairsharing-licence \\\n",
|
|
|
|
"count 1797 \n",
|
|
|
|
"unique 1 \n",
|
|
|
|
"top https://creativecommons.org/licenses/by-sa/4.0... \n",
|
|
|
|
"freq 1797 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.description \\\n",
|
|
|
|
"count 1797 \n",
|
|
|
|
"unique 1797 \n",
|
|
|
|
"top This FAIRsharing record describes: This librar... \n",
|
|
|
|
"freq 1 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.publications attributes.licence-links \\\n",
|
|
|
|
"count 1797 1797 \n",
|
|
|
|
"unique 1109 1082 \n",
|
|
|
|
"top [] [] \n",
|
|
|
|
"freq 661 716 \n",
|
|
|
|
"mean NaN NaN \n",
|
|
|
|
"std NaN NaN \n",
|
|
|
|
"min NaN NaN \n",
|
|
|
|
"25% NaN NaN \n",
|
|
|
|
"50% NaN NaN \n",
|
|
|
|
"75% NaN NaN \n",
|
|
|
|
"max NaN NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.citations \\\n",
|
|
|
|
"count 326 \n",
|
|
|
|
"unique 320 \n",
|
|
|
|
"top [{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31... \n",
|
|
|
|
"freq 6 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.abbreviation \\\n",
|
|
|
|
"count 1638 \n",
|
|
|
|
"unique 1626 \n",
|
|
|
|
"top CGD \n",
|
|
|
|
"freq 3 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.access-points \\\n",
|
|
|
|
"count 449 \n",
|
|
|
|
"unique 444 \n",
|
|
|
|
"top [{'url': 'https://github.com/Ensembl', 'name':... \n",
|
|
|
|
"freq 3 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.associated-tools \\\n",
|
|
|
|
"count 618 \n",
|
|
|
|
"unique 615 \n",
|
|
|
|
"top [{'url': 'http://www.h-invitational.jp/hinv/bl... \n",
|
|
|
|
"freq 2 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.deprecation-date \\\n",
|
|
|
|
"count 217 \n",
|
|
|
|
"unique 55 \n",
|
|
|
|
"top 2021-9-17 \n",
|
|
|
|
"freq 84 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.deprecation-reason \\\n",
|
|
|
|
"count 217 \n",
|
|
|
|
"unique 86 \n",
|
|
|
|
"top This resource is no longer available at the st... \n",
|
|
|
|
"freq 113 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN \n",
|
|
|
|
"\n",
|
|
|
|
" attributes.metadata.tombstone \n",
|
|
|
|
"count 1 \n",
|
|
|
|
"unique 1 \n",
|
|
|
|
"top True \n",
|
|
|
|
"freq 1 \n",
|
|
|
|
"mean NaN \n",
|
|
|
|
"std NaN \n",
|
|
|
|
"min NaN \n",
|
|
|
|
"25% NaN \n",
|
|
|
|
"50% NaN \n",
|
|
|
|
"75% NaN \n",
|
|
|
|
"max NaN "
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
2021-10-08 12:46:14 +02:00
|
|
|
"execution_count": 7,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"fairsharing_df.describe(include='all')"
|
|
|
|
]
|
2021-07-23 12:41:17 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-10-08 12:46:14 +02:00
|
|
|
"execution_count": 8,
|
2021-07-23 12:41:17 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
2021-10-08 12:46:14 +02:00
|
|
|
"id 0\n",
|
|
|
|
"type 0\n",
|
|
|
|
"attributes.created-at 0\n",
|
|
|
|
"attributes.updated-at 0\n",
|
|
|
|
"attributes.metadata.doi 443\n",
|
|
|
|
"attributes.metadata.name 0\n",
|
|
|
|
"attributes.metadata.status 0\n",
|
|
|
|
"attributes.metadata.contacts 119\n",
|
|
|
|
"attributes.metadata.homepage 0\n",
|
|
|
|
"attributes.metadata.identifier 0\n",
|
|
|
|
"attributes.metadata.description 0\n",
|
|
|
|
"attributes.metadata.support-links 189\n",
|
|
|
|
"attributes.metadata.year-creation 305\n",
|
|
|
|
"attributes.metadata.data-processes 232\n",
|
|
|
|
"attributes.legacy-ids 0\n",
|
|
|
|
"attributes.fairsharing-registry 0\n",
|
|
|
|
"attributes.record-type 0\n",
|
|
|
|
"attributes.subjects 0\n",
|
|
|
|
"attributes.domains 0\n",
|
|
|
|
"attributes.taxonomies 0\n",
|
|
|
|
"attributes.user-defined-tags 0\n",
|
|
|
|
"attributes.countries 0\n",
|
|
|
|
"attributes.name 0\n",
|
|
|
|
"attributes.abbreviation 159\n",
|
|
|
|
"attributes.url 0\n",
|
|
|
|
"attributes.doi 443\n",
|
|
|
|
"attributes.fairsharing-licence 0\n",
|
|
|
|
"attributes.description 0\n",
|
|
|
|
"attributes.publications 0\n",
|
|
|
|
"attributes.licence-links 0\n",
|
|
|
|
"attributes.metadata.citations 1471\n",
|
|
|
|
"attributes.metadata.abbreviation 159\n",
|
|
|
|
"attributes.metadata.access-points 1348\n",
|
|
|
|
"attributes.metadata.associated-tools 1179\n",
|
|
|
|
"attributes.metadata.deprecation-date 1580\n",
|
|
|
|
"attributes.metadata.deprecation-reason 1580\n",
|
|
|
|
"attributes.metadata.tombstone 1796\n",
|
2021-07-23 12:41:17 +02:00
|
|
|
"dtype: int64"
|
|
|
|
]
|
|
|
|
},
|
2021-10-08 12:46:14 +02:00
|
|
|
"execution_count": 8,
|
2021-07-23 12:41:17 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"fairsharing_df.isna().sum()"
|
|
|
|
]
|
|
|
|
},
|
2021-10-08 12:46:14 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"attributes.record-type\n",
|
|
|
|
"knowledgebase 774\n",
|
|
|
|
"knowledgebase_and_repository 97\n",
|
|
|
|
"repository 926\n",
|
|
|
|
"dtype: int64"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"pd.DataFrame(fairsharing_df['attributes.record-type']).groupby('attributes.record-type').size()"
|
|
|
|
]
|
|
|
|
},
|
2021-07-23 12:41:17 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
2021-07-22 11:35:40 +02:00
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.8.3"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 4
|
|
|
|
}
|