{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "type | \n", "attributes.created-at | \n", "attributes.updated-at | \n", "attributes.metadata.doi | \n", "attributes.metadata.name | \n", "attributes.metadata.status | \n", "attributes.metadata.contacts | \n", "attributes.metadata.homepage | \n", "attributes.metadata.identifier | \n", "attributes.metadata.description | \n", "attributes.metadata.support-links | \n", "attributes.metadata.year-creation | \n", "attributes.metadata.data-processes | \n", "attributes.legacy-ids | \n", "attributes.fairsharing-registry | \n", "attributes.record-type | \n", "attributes.subjects | \n", "attributes.domains | \n", "attributes.taxonomies | \n", "attributes.user-defined-tags | \n", "attributes.countries | \n", "attributes.name | \n", "attributes.abbreviation | \n", "attributes.url | \n", "attributes.doi | \n", "attributes.fairsharing-licence | \n", "attributes.description | \n", "attributes.publications | \n", "attributes.licence-links | \n", "attributes.metadata.citations | \n", "attributes.metadata.abbreviation | \n", "attributes.metadata.access-points | \n", "attributes.metadata.associated-tools | \n", "attributes.metadata.deprecation-date | \n", "attributes.metadata.deprecation-reason | \n", "attributes.metadata.tombstone | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1723 | \n", "fairsharing-records | \n", "2014-11-04T15:23:40.000Z | \n", "2021-09-30T11:39:06.829Z | \n", "10.25504/FAIRsharing.8t18te | \n", "Cell Image Library | \n", "ready | \n", "[{'contact-name': 'David Orloff', 'contact-ema... | \n", "http://www.cellimagelibrary.org | \n", "1723 | \n", "This library is a public and easily accessible... | \n", "[{'url': 'http://www.cellimagelibrary.org/page... | \n", "2010.0 | \n", "[{'name': 'live update', 'type': 'data release... | \n", "[biodbcore-000180, bsg-d000180] | \n", "Database | \n", "repository | \n", "[Cell Biology, Life Science] | \n", "[Cell, Microscopy, Light microscopy, Electron ... | \n", "[All] | \n", "[] | \n", "[United States] | \n", "FAIRsharing record for: Cell Image Library | \n", "None | \n", "https://fairsharing.org/10.25504/FAIRsharing.8... | \n", "10.25504/FAIRsharing.8t18te | \n", "https://creativecommons.org/licenses/by-sa/4.0... | \n", "This FAIRsharing record describes: This librar... | \n", "[{'id': 232, 'pubmed_id': 23203874, 'title': '... | \n", "[{'licence-name': 'Cell Image Library Data Pol... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "3101 | \n", "fairsharing-records | \n", "2020-09-16T08:49:13.000Z | \n", "2021-09-30T11:36:45.452Z | \n", "NaN | \n", "WHOI Ship Data-Grabber System | \n", "ready | \n", "NaN | \n", "http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html | \n", "3101 | \n", "The WHOI Ship DataGrabber system provides the ... | \n", "[{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o... | \n", "2004.0 | \n", "[{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai... | \n", "[biodbcore-001609, bsg-d001609] | \n", "Database | \n", "repository | \n", "[Earth Science, Water Research, Oceanography] | \n", "[] | \n", "[Not applicable] | \n", "[subseafloor environments] | \n", "[United States] | \n", "FAIRsharing record for: WHOI Ship Data-Grabber... | \n", "None | \n", "https://fairsharing.org/fairsharing_records/3101 | \n", "None | \n", "https://creativecommons.org/licenses/by-sa/4.0... | \n", "This FAIRsharing record describes: The WHOI Sh... | \n", "[] | \n", "[{'licence-name': 'NDSF Data Archive Policy', ... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "2649 | \n", "fairsharing-records | \n", "2018-08-07T20:23:32.000Z | \n", "2021-09-30T11:39:07.898Z | \n", "NaN | \n", "Electron Microscope Public Image Archive | \n", "ready | \n", "[{'contact-name': 'General contact', 'contact-... | \n", "https://www.ebi.ac.uk/pdbe/emdb/empiar/ | \n", "2649 | \n", "EMPIAR, the Electron Microscopy Public Image A... | \n", "[{'url': 'https://www.ebi.ac.uk/support/EMPIAR... | \n", "2015.0 | \n", "[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... | \n", "[biodbcore-001140, bsg-d001140] | \n", "Database | \n", "repository | \n", "[Bioinformatics, Biology] | \n", "[Protein image, Microscopy, Electron microscop... | \n", "[All] | \n", "[] | \n", "[Greece, Czech Republic, United Kingdom, Icela... | \n", "FAIRsharing record for: Electron Microscope Pu... | \n", "EMPIAR | \n", "https://fairsharing.org/fairsharing_records/2649 | \n", "None | \n", "https://creativecommons.org/licenses/by-sa/4.0... | \n", "This FAIRsharing record describes: EMPIAR, the... | \n", "[{'id': 2232, 'pubmed_id': 27067018, 'title': ... | \n", "[{'licence-name': 'EMBL-EBI Terms of Use', 'li... | \n", "[{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27... | \n", "EMPIAR | \n", "[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... | \n", "[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "2657 | \n", "fairsharing-records | \n", "2018-08-13T15:12:11.000Z | \n", "2021-09-30T11:37:28.736Z | \n", "10.25504/FAIRsharing.tnByoG | \n", "ClinicalStudyDataRequest.com | \n", "ready | \n", "[{'contact-email': 'support@clinicalstudydatar... | \n", "https://clinicalstudydatarequest.com/ | \n", "2657 | \n", "ClinicalStudyDataRequest.com (CSDR) is a conso... | \n", "[{'url': 'https://clinicalstudydatarequest.com... | \n", "2014.0 | \n", "[{'url': 'https://clinicalstudydatarequest.com... | \n", "[biodbcore-001149, bsg-d001149] | \n", "Database | \n", "repository | \n", "[Preclinical Studies, Biomedical Science] | \n", "[] | \n", "[Homo sapiens] | \n", "[] | \n", "[Worldwide] | \n", "FAIRsharing record for: ClinicalStudyDataReque... | \n", "CSDR | \n", "https://fairsharing.org/10.25504/FAIRsharing.t... | \n", "10.25504/FAIRsharing.tnByoG | \n", "https://creativecommons.org/licenses/by-sa/4.0... | \n", "This FAIRsharing record describes: ClinicalStu... | \n", "[] | \n", "[{'licence-name': 'CSDR Data Sharing Agreement... | \n", "NaN | \n", "CSDR | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "2078 | \n", "fairsharing-records | \n", "2014-11-04T15:23:40.000Z | \n", "2021-09-30T11:34:43.129Z | \n", "10.25504/FAIRsharing.3axym7 | \n", "Germplasm Resources Information Network | \n", "ready | \n", "[{'contact-email': 'dbmu@ars-grin.gov'}] | \n", "https://www.ars-grin.gov/ | \n", "2078 | \n", "GRIN provides National Genetic Resources Progr... | \n", "[{'url': 'https://www.ars-grin.gov/Pages/Colle... | \n", "2010.0 | \n", "[{'url': 'https://www.ars-grin.gov/', 'name': ... | \n", "[biodbcore-000546, bsg-d000546] | \n", "Database | \n", "repository | \n", "[Life Science] | \n", "[Cell, Cell culture, Germplasm] | \n", "[Bacteria, Metazoa, Viridiplantae] | \n", "[] | \n", "[United States] | \n", "FAIRsharing record for: Germplasm Resources In... | \n", "GRIN | \n", "https://fairsharing.org/10.25504/FAIRsharing.3... | \n", "10.25504/FAIRsharing.3axym7 | \n", "https://creativecommons.org/licenses/by-sa/4.0... | \n", "This FAIRsharing record describes: GRIN provid... | \n", "[] | \n", "[] | \n", "NaN | \n", "GRIN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
\n", " | id | \n", "type | \n", "attributes.created-at | \n", "attributes.updated-at | \n", "attributes.metadata.doi | \n", "attributes.metadata.name | \n", "attributes.metadata.status | \n", "attributes.metadata.contacts | \n", "attributes.metadata.homepage | \n", "attributes.metadata.identifier | \n", "attributes.metadata.description | \n", "attributes.metadata.support-links | \n", "attributes.metadata.year-creation | \n", "attributes.metadata.data-processes | \n", "attributes.legacy-ids | \n", "attributes.fairsharing-registry | \n", "attributes.record-type | \n", "attributes.subjects | \n", "attributes.domains | \n", "attributes.taxonomies | \n", "attributes.user-defined-tags | \n", "attributes.countries | \n", "attributes.name | \n", "attributes.abbreviation | \n", "attributes.url | \n", "attributes.doi | \n", "attributes.fairsharing-licence | \n", "attributes.description | \n", "attributes.publications | \n", "attributes.licence-links | \n", "attributes.metadata.citations | \n", "attributes.metadata.abbreviation | \n", "attributes.metadata.access-points | \n", "attributes.metadata.associated-tools | \n", "attributes.metadata.deprecation-date | \n", "attributes.metadata.deprecation-reason | \n", "attributes.metadata.tombstone | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1354 | \n", "1797 | \n", "1797 | \n", "1678 | \n", "1797 | \n", "1797.000000 | \n", "1797 | \n", "1608 | \n", "1492.000000 | \n", "1565 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1638 | \n", "1797 | \n", "1354 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "1797 | \n", "326 | \n", "1638 | \n", "449 | \n", "618 | \n", "217 | \n", "217 | \n", "1 | \n", "
unique | \n", "1797 | \n", "1 | \n", "1162 | \n", "1797 | \n", "1354 | \n", "1796 | \n", "4 | \n", "1576 | \n", "1797 | \n", "NaN | \n", "1797 | \n", "1594 | \n", "NaN | \n", "1563 | \n", "1797 | \n", "1 | \n", "3 | \n", "888 | \n", "1163 | \n", "378 | \n", "384 | \n", "185 | \n", "1796 | \n", "1626 | \n", "1797 | \n", "1354 | \n", "1 | \n", "1797 | \n", "1109 | \n", "1082 | \n", "320 | \n", "1626 | \n", "444 | \n", "615 | \n", "55 | \n", "86 | \n", "1 | \n", "
top | \n", "1723 | \n", "fairsharing-records | \n", "2014-11-04T15:23:40.000Z | \n", "2021-09-30T11:39:06.829Z | \n", "10.25504/FAIRsharing.8t18te | \n", "OmicsDB | \n", "ready | \n", "[{'contact-name': 'Sam Hokin', 'contact-email'... | \n", "http://www.cellimagelibrary.org | \n", "NaN | \n", "This library is a public and easily accessible... | \n", "[{'url': 'https://github.com/gbif/ipt/wiki/IPT... | \n", "NaN | \n", "[{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea... | \n", "[biodbcore-000180, bsg-d000180] | \n", "Database | \n", "repository | \n", "[Life Science] | \n", "[] | \n", "[All] | \n", "[] | \n", "[United States] | \n", "FAIRsharing record for: OmicsDB | \n", "CGD | \n", "https://fairsharing.org/10.25504/FAIRsharing.8... | \n", "10.25504/FAIRsharing.8t18te | \n", "https://creativecommons.org/licenses/by-sa/4.0... | \n", "This FAIRsharing record describes: This librar... | \n", "[] | \n", "[] | \n", "[{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31... | \n", "CGD | \n", "[{'url': 'https://github.com/Ensembl', 'name':... | \n", "[{'url': 'http://www.h-invitational.jp/hinv/bl... | \n", "2021-9-17 | \n", "This resource is no longer available at the st... | \n", "True | \n", "
freq | \n", "1 | \n", "1797 | \n", "636 | \n", "1 | \n", "1 | \n", "2 | \n", "1540 | \n", "6 | \n", "1 | \n", "NaN | \n", "1 | \n", "6 | \n", "NaN | \n", "2 | \n", "1 | \n", "1797 | \n", "926 | \n", "350 | \n", "265 | \n", "502 | \n", "1193 | \n", "594 | \n", "2 | \n", "3 | \n", "1 | \n", "1 | \n", "1797 | \n", "1 | \n", "661 | \n", "716 | \n", "6 | \n", "3 | \n", "3 | \n", "2 | \n", "84 | \n", "113 | \n", "1 | \n", "
mean | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2446.100167 | \n", "NaN | \n", "NaN | \n", "2007.636059 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
std | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "520.058757 | \n", "NaN | \n", "NaN | \n", "10.953269 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
min | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1547.000000 | \n", "NaN | \n", "NaN | \n", "1894.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
25% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1996.000000 | \n", "NaN | \n", "NaN | \n", "2004.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
50% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2445.000000 | \n", "NaN | \n", "NaN | \n", "2010.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
75% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2897.000000 | \n", "NaN | \n", "NaN | \n", "2014.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
max | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "3346.000000 | \n", "NaN | \n", "NaN | \n", "2021.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "