{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtypeattributes.created-atattributes.updated-atattributes.metadata.doiattributes.metadata.nameattributes.metadata.statusattributes.metadata.contactsattributes.metadata.homepageattributes.metadata.identifierattributes.metadata.descriptionattributes.metadata.support-linksattributes.metadata.year-creationattributes.metadata.data-processesattributes.legacy-idsattributes.fairsharing-registryattributes.record-typeattributes.subjectsattributes.domainsattributes.taxonomiesattributes.user-defined-tagsattributes.countriesattributes.nameattributes.abbreviationattributes.urlattributes.doiattributes.fairsharing-licenceattributes.descriptionattributes.publicationsattributes.licence-linksattributes.metadata.citationsattributes.metadata.abbreviationattributes.metadata.access-pointsattributes.metadata.associated-toolsattributes.metadata.deprecation-dateattributes.metadata.deprecation-reasonattributes.metadata.tombstone
01723fairsharing-records2014-11-04T15:23:40.000Z2021-09-30T11:39:06.829Z10.25504/FAIRsharing.8t18teCell Image Libraryready[{'contact-name': 'David Orloff', 'contact-ema...http://www.cellimagelibrary.org1723This library is a public and easily accessible...[{'url': 'http://www.cellimagelibrary.org/page...2010.0[{'name': 'live update', 'type': 'data release...[biodbcore-000180, bsg-d000180]Databaserepository[Cell Biology, Life Science][Cell, Microscopy, Light microscopy, Electron ...[All][][United States]FAIRsharing record for: Cell Image LibraryNonehttps://fairsharing.org/10.25504/FAIRsharing.8...10.25504/FAIRsharing.8t18tehttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: This librar...[{'id': 232, 'pubmed_id': 23203874, 'title': '...[{'licence-name': 'Cell Image Library Data Pol...NaNNaNNaNNaNNaNNaNNaN
13101fairsharing-records2020-09-16T08:49:13.000Z2021-09-30T11:36:45.452ZNaNWHOI Ship Data-Grabber SystemreadyNaNhttp://4dgeo.whoi.edu/shipdata/SDG_shipdata.html3101The WHOI Ship DataGrabber system provides the ...[{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o...2004.0[{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai...[biodbcore-001609, bsg-d001609]Databaserepository[Earth Science, Water Research, Oceanography][][Not applicable][subseafloor environments][United States]FAIRsharing record for: WHOI Ship Data-Grabber...Nonehttps://fairsharing.org/fairsharing_records/3101Nonehttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: The WHOI Sh...[][{'licence-name': 'NDSF Data Archive Policy', ...NaNNaNNaNNaNNaNNaNNaN
22649fairsharing-records2018-08-07T20:23:32.000Z2021-09-30T11:39:07.898ZNaNElectron Microscope Public Image Archiveready[{'contact-name': 'General contact', 'contact-...https://www.ebi.ac.uk/pdbe/emdb/empiar/2649EMPIAR, the Electron Microscopy Public Image A...[{'url': 'https://www.ebi.ac.uk/support/EMPIAR...2015.0[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...[biodbcore-001140, bsg-d001140]Databaserepository[Bioinformatics, Biology][Protein image, Microscopy, Electron microscop...[All][][Greece, Czech Republic, United Kingdom, Icela...FAIRsharing record for: Electron Microscope Pu...EMPIARhttps://fairsharing.org/fairsharing_records/2649Nonehttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: EMPIAR, the...[{'id': 2232, 'pubmed_id': 27067018, 'title': ...[{'licence-name': 'EMBL-EBI Terms of Use', 'li...[{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27...EMPIAR[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...NaNNaNNaN
32657fairsharing-records2018-08-13T15:12:11.000Z2021-09-30T11:37:28.736Z10.25504/FAIRsharing.tnByoGClinicalStudyDataRequest.comready[{'contact-email': 'support@clinicalstudydatar...https://clinicalstudydatarequest.com/2657ClinicalStudyDataRequest.com (CSDR) is a conso...[{'url': 'https://clinicalstudydatarequest.com...2014.0[{'url': 'https://clinicalstudydatarequest.com...[biodbcore-001149, bsg-d001149]Databaserepository[Preclinical Studies, Biomedical Science][][Homo sapiens][][Worldwide]FAIRsharing record for: ClinicalStudyDataReque...CSDRhttps://fairsharing.org/10.25504/FAIRsharing.t...10.25504/FAIRsharing.tnByoGhttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: ClinicalStu...[][{'licence-name': 'CSDR Data Sharing Agreement...NaNCSDRNaNNaNNaNNaNNaN
42078fairsharing-records2014-11-04T15:23:40.000Z2021-09-30T11:34:43.129Z10.25504/FAIRsharing.3axym7Germplasm Resources Information Networkready[{'contact-email': 'dbmu@ars-grin.gov'}]https://www.ars-grin.gov/2078GRIN provides National Genetic Resources Progr...[{'url': 'https://www.ars-grin.gov/Pages/Colle...2010.0[{'url': 'https://www.ars-grin.gov/', 'name': ...[biodbcore-000546, bsg-d000546]Databaserepository[Life Science][Cell, Cell culture, Germplasm][Bacteria, Metazoa, Viridiplantae][][United States]FAIRsharing record for: Germplasm Resources In...GRINhttps://fairsharing.org/10.25504/FAIRsharing.3...10.25504/FAIRsharing.3axym7https://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: GRIN provid...[][]NaNGRINNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " id type attributes.created-at \\\n", "0 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n", "1 3101 fairsharing-records 2020-09-16T08:49:13.000Z \n", "2 2649 fairsharing-records 2018-08-07T20:23:32.000Z \n", "3 2657 fairsharing-records 2018-08-13T15:12:11.000Z \n", "4 2078 fairsharing-records 2014-11-04T15:23:40.000Z \n", "\n", " attributes.updated-at attributes.metadata.doi \\\n", "0 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n", "1 2021-09-30T11:36:45.452Z NaN \n", "2 2021-09-30T11:39:07.898Z NaN \n", "3 2021-09-30T11:37:28.736Z 10.25504/FAIRsharing.tnByoG \n", "4 2021-09-30T11:34:43.129Z 10.25504/FAIRsharing.3axym7 \n", "\n", " attributes.metadata.name attributes.metadata.status \\\n", "0 Cell Image Library ready \n", "1 WHOI Ship Data-Grabber System ready \n", "2 Electron Microscope Public Image Archive ready \n", "3 ClinicalStudyDataRequest.com ready \n", "4 Germplasm Resources Information Network ready \n", "\n", " attributes.metadata.contacts \\\n", "0 [{'contact-name': 'David Orloff', 'contact-ema... \n", "1 NaN \n", "2 [{'contact-name': 'General contact', 'contact-... \n", "3 [{'contact-email': 'support@clinicalstudydatar... \n", "4 [{'contact-email': 'dbmu@ars-grin.gov'}] \n", "\n", " attributes.metadata.homepage \\\n", "0 http://www.cellimagelibrary.org \n", "1 http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html \n", "2 https://www.ebi.ac.uk/pdbe/emdb/empiar/ \n", "3 https://clinicalstudydatarequest.com/ \n", "4 https://www.ars-grin.gov/ \n", "\n", " attributes.metadata.identifier \\\n", "0 1723 \n", "1 3101 \n", "2 2649 \n", "3 2657 \n", "4 2078 \n", "\n", " attributes.metadata.description \\\n", "0 This library is a public and easily accessible... \n", "1 The WHOI Ship DataGrabber system provides the ... \n", "2 EMPIAR, the Electron Microscopy Public Image A... \n", "3 ClinicalStudyDataRequest.com (CSDR) is a conso... \n", "4 GRIN provides National Genetic Resources Progr... \n", "\n", " attributes.metadata.support-links \\\n", "0 [{'url': 'http://www.cellimagelibrary.org/page... \n", "1 [{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o... \n", "2 [{'url': 'https://www.ebi.ac.uk/support/EMPIAR... \n", "3 [{'url': 'https://clinicalstudydatarequest.com... \n", "4 [{'url': 'https://www.ars-grin.gov/Pages/Colle... \n", "\n", " attributes.metadata.year-creation \\\n", "0 2010.0 \n", "1 2004.0 \n", "2 2015.0 \n", "3 2014.0 \n", "4 2010.0 \n", "\n", " attributes.metadata.data-processes \\\n", "0 [{'name': 'live update', 'type': 'data release... \n", "1 [{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai... \n", "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n", "3 [{'url': 'https://clinicalstudydatarequest.com... \n", "4 [{'url': 'https://www.ars-grin.gov/', 'name': ... \n", "\n", " attributes.legacy-ids attributes.fairsharing-registry \\\n", "0 [biodbcore-000180, bsg-d000180] Database \n", "1 [biodbcore-001609, bsg-d001609] Database \n", "2 [biodbcore-001140, bsg-d001140] Database \n", "3 [biodbcore-001149, bsg-d001149] Database \n", "4 [biodbcore-000546, bsg-d000546] Database \n", "\n", " attributes.record-type attributes.subjects \\\n", "0 repository [Cell Biology, Life Science] \n", "1 repository [Earth Science, Water Research, Oceanography] \n", "2 repository [Bioinformatics, Biology] \n", "3 repository [Preclinical Studies, Biomedical Science] \n", "4 repository [Life Science] \n", "\n", " attributes.domains \\\n", "0 [Cell, Microscopy, Light microscopy, Electron ... \n", "1 [] \n", "2 [Protein image, Microscopy, Electron microscop... \n", "3 [] \n", "4 [Cell, Cell culture, Germplasm] \n", "\n", " attributes.taxonomies attributes.user-defined-tags \\\n", "0 [All] [] \n", "1 [Not applicable] [subseafloor environments] \n", "2 [All] [] \n", "3 [Homo sapiens] [] \n", "4 [Bacteria, Metazoa, Viridiplantae] [] \n", "\n", " attributes.countries \\\n", "0 [United States] \n", "1 [United States] \n", "2 [Greece, Czech Republic, United Kingdom, Icela... \n", "3 [Worldwide] \n", "4 [United States] \n", "\n", " attributes.name attributes.abbreviation \\\n", "0 FAIRsharing record for: Cell Image Library None \n", "1 FAIRsharing record for: WHOI Ship Data-Grabber... None \n", "2 FAIRsharing record for: Electron Microscope Pu... EMPIAR \n", "3 FAIRsharing record for: ClinicalStudyDataReque... CSDR \n", "4 FAIRsharing record for: Germplasm Resources In... GRIN \n", "\n", " attributes.url \\\n", "0 https://fairsharing.org/10.25504/FAIRsharing.8... \n", "1 https://fairsharing.org/fairsharing_records/3101 \n", "2 https://fairsharing.org/fairsharing_records/2649 \n", "3 https://fairsharing.org/10.25504/FAIRsharing.t... \n", "4 https://fairsharing.org/10.25504/FAIRsharing.3... \n", "\n", " attributes.doi \\\n", "0 10.25504/FAIRsharing.8t18te \n", "1 None \n", "2 None \n", "3 10.25504/FAIRsharing.tnByoG \n", "4 10.25504/FAIRsharing.3axym7 \n", "\n", " attributes.fairsharing-licence \\\n", "0 https://creativecommons.org/licenses/by-sa/4.0... \n", "1 https://creativecommons.org/licenses/by-sa/4.0... \n", "2 https://creativecommons.org/licenses/by-sa/4.0... \n", "3 https://creativecommons.org/licenses/by-sa/4.0... \n", "4 https://creativecommons.org/licenses/by-sa/4.0... \n", "\n", " attributes.description \\\n", "0 This FAIRsharing record describes: This librar... \n", "1 This FAIRsharing record describes: The WHOI Sh... \n", "2 This FAIRsharing record describes: EMPIAR, the... \n", "3 This FAIRsharing record describes: ClinicalStu... \n", "4 This FAIRsharing record describes: GRIN provid... \n", "\n", " attributes.publications \\\n", "0 [{'id': 232, 'pubmed_id': 23203874, 'title': '... \n", "1 [] \n", "2 [{'id': 2232, 'pubmed_id': 27067018, 'title': ... \n", "3 [] \n", "4 [] \n", "\n", " attributes.licence-links \\\n", "0 [{'licence-name': 'Cell Image Library Data Pol... \n", "1 [{'licence-name': 'NDSF Data Archive Policy', ... \n", "2 [{'licence-name': 'EMBL-EBI Terms of Use', 'li... \n", "3 [{'licence-name': 'CSDR Data Sharing Agreement... \n", "4 [] \n", "\n", " attributes.metadata.citations \\\n", "0 NaN \n", "1 NaN \n", "2 [{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27... \n", "3 NaN \n", "4 NaN \n", "\n", " attributes.metadata.abbreviation \\\n", "0 NaN \n", "1 NaN \n", "2 EMPIAR \n", "3 CSDR \n", "4 GRIN \n", "\n", " attributes.metadata.access-points \\\n", "0 NaN \n", "1 NaN \n", "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n", "3 NaN \n", "4 NaN \n", "\n", " attributes.metadata.associated-tools \\\n", "0 NaN \n", "1 NaN \n", "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n", "3 NaN \n", "4 NaN \n", "\n", " attributes.metadata.deprecation-date attributes.metadata.deprecation-reason \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " attributes.metadata.tombstone \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open('../data/raw/fairsharing_dump_api_09_2021.json') as f:\n", " lines = f.read().splitlines()\n", " \n", "fairsharing_df = pd.DataFrame(lines)\n", "fairsharing_df.columns = ['json_element']\n", "fairsharing_df['json_element'].apply(json.loads)\n", "fairsharing_df = pd.json_normalize(fairsharing_df['json_element'].apply(json.loads))\n", "\n", "fairsharing_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtypeattributes.created-atattributes.updated-atattributes.metadata.doiattributes.metadata.nameattributes.metadata.statusattributes.metadata.contactsattributes.metadata.homepageattributes.metadata.identifierattributes.metadata.descriptionattributes.metadata.support-linksattributes.metadata.year-creationattributes.metadata.data-processesattributes.legacy-idsattributes.fairsharing-registryattributes.record-typeattributes.subjectsattributes.domainsattributes.taxonomiesattributes.user-defined-tagsattributes.countriesattributes.nameattributes.abbreviationattributes.urlattributes.doiattributes.fairsharing-licenceattributes.descriptionattributes.publicationsattributes.licence-linksattributes.metadata.citationsattributes.metadata.abbreviationattributes.metadata.access-pointsattributes.metadata.associated-toolsattributes.metadata.deprecation-dateattributes.metadata.deprecation-reasonattributes.metadata.tombstone
count1797179717971797135417971797167817971797.000000179716081492.0000001565179717971797179717971797179717971797163817971354179717971797179732616384496182172171
unique179711162179713541796415761797NaN17971594NaN1563179713888116337838418517961626179713541179711091082320162644461555861
top1723fairsharing-records2014-11-04T15:23:40.000Z2021-09-30T11:39:06.829Z10.25504/FAIRsharing.8t18teOmicsDBready[{'contact-name': 'Sam Hokin', 'contact-email'...http://www.cellimagelibrary.orgNaNThis library is a public and easily accessible...[{'url': 'https://github.com/gbif/ipt/wiki/IPT...NaN[{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea...[biodbcore-000180, bsg-d000180]Databaserepository[Life Science][][All][][United States]FAIRsharing record for: OmicsDBCGDhttps://fairsharing.org/10.25504/FAIRsharing.8...10.25504/FAIRsharing.8t18tehttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: This librar...[][][{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31...CGD[{'url': 'https://github.com/Ensembl', 'name':...[{'url': 'http://www.h-invitational.jp/hinv/bl...2021-9-17This resource is no longer available at the st...True
freq11797636112154061NaN16NaN21179792635026550211935942311179716617166332841131
meanNaNNaNNaNNaNNaNNaNNaNNaNNaN2446.100167NaNNaN2007.636059NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
stdNaNNaNNaNNaNNaNNaNNaNNaNNaN520.058757NaNNaN10.953269NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
minNaNNaNNaNNaNNaNNaNNaNNaNNaN1547.000000NaNNaN1894.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
25%NaNNaNNaNNaNNaNNaNNaNNaNNaN1996.000000NaNNaN2004.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
50%NaNNaNNaNNaNNaNNaNNaNNaNNaN2445.000000NaNNaN2010.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
75%NaNNaNNaNNaNNaNNaNNaNNaNNaN2897.000000NaNNaN2014.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
maxNaNNaNNaNNaNNaNNaNNaNNaNNaN3346.000000NaNNaN2021.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " id type attributes.created-at \\\n", "count 1797 1797 1797 \n", "unique 1797 1 1162 \n", "top 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n", "freq 1 1797 636 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " attributes.updated-at attributes.metadata.doi \\\n", "count 1797 1354 \n", "unique 1797 1354 \n", "top 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n", "freq 1 1 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.metadata.name attributes.metadata.status \\\n", "count 1797 1797 \n", "unique 1796 4 \n", "top OmicsDB ready \n", "freq 2 1540 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.metadata.contacts \\\n", "count 1678 \n", "unique 1576 \n", "top [{'contact-name': 'Sam Hokin', 'contact-email'... \n", "freq 6 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.homepage attributes.metadata.identifier \\\n", "count 1797 1797.000000 \n", "unique 1797 NaN \n", "top http://www.cellimagelibrary.org NaN \n", "freq 1 NaN \n", "mean NaN 2446.100167 \n", "std NaN 520.058757 \n", "min NaN 1547.000000 \n", "25% NaN 1996.000000 \n", "50% NaN 2445.000000 \n", "75% NaN 2897.000000 \n", "max NaN 3346.000000 \n", "\n", " attributes.metadata.description \\\n", "count 1797 \n", "unique 1797 \n", "top This library is a public and easily accessible... \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.support-links \\\n", "count 1608 \n", "unique 1594 \n", "top [{'url': 'https://github.com/gbif/ipt/wiki/IPT... \n", "freq 6 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.year-creation \\\n", "count 1492.000000 \n", "unique NaN \n", "top NaN \n", "freq NaN \n", "mean 2007.636059 \n", "std 10.953269 \n", "min 1894.000000 \n", "25% 2004.000000 \n", "50% 2010.000000 \n", "75% 2014.000000 \n", "max 2021.000000 \n", "\n", " attributes.metadata.data-processes \\\n", "count 1565 \n", "unique 1563 \n", "top [{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea... \n", "freq 2 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.legacy-ids attributes.fairsharing-registry \\\n", "count 1797 1797 \n", "unique 1797 1 \n", "top [biodbcore-000180, bsg-d000180] Database \n", "freq 1 1797 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.record-type attributes.subjects attributes.domains \\\n", "count 1797 1797 1797 \n", "unique 3 888 1163 \n", "top repository [Life Science] [] \n", "freq 926 350 265 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " attributes.taxonomies attributes.user-defined-tags \\\n", "count 1797 1797 \n", "unique 378 384 \n", "top [All] [] \n", "freq 502 1193 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.countries attributes.name \\\n", "count 1797 1797 \n", "unique 185 1796 \n", "top [United States] FAIRsharing record for: OmicsDB \n", "freq 594 2 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.abbreviation \\\n", "count 1638 \n", "unique 1626 \n", "top CGD \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.url \\\n", "count 1797 \n", "unique 1797 \n", "top https://fairsharing.org/10.25504/FAIRsharing.8... \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.doi \\\n", "count 1354 \n", "unique 1354 \n", "top 10.25504/FAIRsharing.8t18te \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.fairsharing-licence \\\n", "count 1797 \n", "unique 1 \n", "top https://creativecommons.org/licenses/by-sa/4.0... \n", "freq 1797 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.description \\\n", "count 1797 \n", "unique 1797 \n", "top This FAIRsharing record describes: This librar... \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.publications attributes.licence-links \\\n", "count 1797 1797 \n", "unique 1109 1082 \n", "top [] [] \n", "freq 661 716 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.metadata.citations \\\n", "count 326 \n", "unique 320 \n", "top [{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31... \n", "freq 6 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.abbreviation \\\n", "count 1638 \n", "unique 1626 \n", "top CGD \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.access-points \\\n", "count 449 \n", "unique 444 \n", "top [{'url': 'https://github.com/Ensembl', 'name':... \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.associated-tools \\\n", "count 618 \n", "unique 615 \n", "top [{'url': 'http://www.h-invitational.jp/hinv/bl... \n", "freq 2 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.deprecation-date \\\n", "count 217 \n", "unique 55 \n", "top 2021-9-17 \n", "freq 84 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.deprecation-reason \\\n", "count 217 \n", "unique 86 \n", "top This resource is no longer available at the st... \n", "freq 113 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.tombstone \n", "count 1 \n", "unique 1 \n", "top True \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "id 0\n", "type 0\n", "attributes.created-at 0\n", "attributes.updated-at 0\n", "attributes.metadata.doi 443\n", "attributes.metadata.name 0\n", "attributes.metadata.status 0\n", "attributes.metadata.contacts 119\n", "attributes.metadata.homepage 0\n", "attributes.metadata.identifier 0\n", "attributes.metadata.description 0\n", "attributes.metadata.support-links 189\n", "attributes.metadata.year-creation 305\n", "attributes.metadata.data-processes 232\n", "attributes.legacy-ids 0\n", "attributes.fairsharing-registry 0\n", "attributes.record-type 0\n", "attributes.subjects 0\n", "attributes.domains 0\n", "attributes.taxonomies 0\n", "attributes.user-defined-tags 0\n", "attributes.countries 0\n", "attributes.name 0\n", "attributes.abbreviation 159\n", "attributes.url 0\n", "attributes.doi 443\n", "attributes.fairsharing-licence 0\n", "attributes.description 0\n", "attributes.publications 0\n", "attributes.licence-links 0\n", "attributes.metadata.citations 1471\n", "attributes.metadata.abbreviation 159\n", "attributes.metadata.access-points 1348\n", "attributes.metadata.associated-tools 1179\n", "attributes.metadata.deprecation-date 1580\n", "attributes.metadata.deprecation-reason 1580\n", "attributes.metadata.tombstone 1796\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df.isna().sum()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "attributes.record-type\n", "knowledgebase 774\n", "knowledgebase_and_repository 97\n", "repository 926\n", "dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(fairsharing_df['attributes.record-type']).groupby('attributes.record-type').size()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }