{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "import reverse_geocoder as rg\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import pycountry_convert\n", "\n", "import matplotlib.pyplot as plt\n", "from matplotlib_venn import venn2, venn2_circles\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**re3data**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
re3data_idrepository_nametypesubjectprovider_typekeywordinstitution
0r3d100000001Odum Institute Archive Dataverse[disciplinary][1 Humanities and Social Sciences, 111 Social ...[dataProvider][FAIR, Middle East, crime, demography, economy...[[Odum Institute for Research in Social Scienc...
1r3d100000002Access to Archival Databases[disciplinary][1 Humanities and Social Sciences, 102 History...[dataProvider][US History][[The U.S. National Archives and Records Admin...
2r3d100000004Datenbank Gesprochenes Deutsch[disciplinary][1 Humanities and Social Sciences, 104 Linguis...[dataProvider, serviceProvider][Australian German, FOLK, German dialects, Pfe...[[Institut für Deutsche Sprache, Archiv für Ge...
3r3d100000005UNC Dataverse[institutional][1 Humanities and Social Sciences, 111 Social ...[dataProvider, serviceProvider][FAIR, census, demographic survey, demography,...[[Odum Institute for Research in Social Scienc...
4r3d100000006Archaeology Data Service[disciplinary][1 Humanities and Social Sciences, 101 Ancient...[dataProvider, serviceProvider][FAIR, archaeology, cultural heritage, prehist...[[Arts and Humanities Research Council, [AHRC]...
\n", "
" ], "text/plain": [ " re3data_id repository_name type \\\n", "0 r3d100000001 Odum Institute Archive Dataverse [disciplinary] \n", "1 r3d100000002 Access to Archival Databases [disciplinary] \n", "2 r3d100000004 Datenbank Gesprochenes Deutsch [disciplinary] \n", "3 r3d100000005 UNC Dataverse [institutional] \n", "4 r3d100000006 Archaeology Data Service [disciplinary] \n", "\n", " subject \\\n", "0 [1 Humanities and Social Sciences, 111 Social ... \n", "1 [1 Humanities and Social Sciences, 102 History... \n", "2 [1 Humanities and Social Sciences, 104 Linguis... \n", "3 [1 Humanities and Social Sciences, 111 Social ... \n", "4 [1 Humanities and Social Sciences, 101 Ancient... \n", "\n", " provider_type \\\n", "0 [dataProvider] \n", "1 [dataProvider] \n", "2 [dataProvider, serviceProvider] \n", "3 [dataProvider, serviceProvider] \n", "4 [dataProvider, serviceProvider] \n", "\n", " keyword \\\n", "0 [FAIR, Middle East, crime, demography, economy... \n", "1 [US History] \n", "2 [Australian German, FOLK, German dialects, Pfe... \n", "3 [FAIR, census, demographic survey, demography,... \n", "4 [FAIR, archaeology, cultural heritage, prehist... \n", "\n", " institution \n", "0 [[Odum Institute for Research in Social Scienc... \n", "1 [[The U.S. National Archives and Records Admin... \n", "2 [[Institut für Deutsche Sprache, Archiv für Ge... \n", "3 [[Odum Institute for Research in Social Scienc... \n", "4 [[Arts and Humanities Research Council, [AHRC]... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df = pd.read_csv('../data/raw/re3data.tsv', delimiter='\\t', \n", " converters={'subject': ast.literal_eval,\n", " 'keyword': ast.literal_eval,\n", " 'additional_name': ast.literal_eval,\n", " 'repository_id': ast.literal_eval,\n", " 'type': ast.literal_eval,\n", " 'content_type': ast.literal_eval,\n", " 'provider_type': ast.literal_eval,\n", " 'institution': ast.literal_eval\n", " },\n", " usecols=['re3data_id', 'repository_name', 'subject', 'keyword', 'type', 'provider_type', 'institution'])\n", "re3data_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**HERE I AM FILTERING SERVICE PROVIDERS OUT!!**" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "re3data_df = re3data_df.explode('provider_type')\n", "re3data_df = re3data_df[re3data_df.provider_type != 'serviceProvider']" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
re3data_idrepository_nametypesubjectprovider_typekeywordinstitution
count2467246724672467245924672467
unique2466246391282122482447
topr3d100011987Landmap[disciplinary][1 Humanities and Social Sciences, 2 Life Scie...dataProvider[multidisciplinary][[National Center for Biotechnology Informatio...
freq22157320024591816
\n", "
" ], "text/plain": [ " re3data_id repository_name type \\\n", "count 2467 2467 2467 \n", "unique 2466 2463 9 \n", "top r3d100011987 Landmap [disciplinary] \n", "freq 2 2 1573 \n", "\n", " subject provider_type \\\n", "count 2467 2459 \n", "unique 1282 1 \n", "top [1 Humanities and Social Sciences, 2 Life Scie... dataProvider \n", "freq 200 2459 \n", "\n", " keyword institution \n", "count 2467 2467 \n", "unique 2248 2447 \n", "top [multidisciplinary] [[National Center for Biotechnology Informatio... \n", "freq 181 6 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**openDOAR**" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
opendoar_idrepository_nametypesubjectinstitution
0101utrecht university repositoryinstitutional[multidisciplinary][[university of utrecht, [universiteit utrecht...
1115dspace at indian institute of management kozhi...institutional[ecology and environment, social sciences gene...[[indian institute of management kozhikode, [i...
241caltech engineering and science onlineinstitutional[biology and biochemistry, chemistry and chemi...[[california institute of technology, [caltech...
3119dcu online research access serviceinstitutional[multidisciplinary][[dublin city university, [dcu], ie, [], , htt...
4129earth-prints repositorydisciplinary[earth and planetary sciences][[istituto nazionale di geofisica e vulcanolog...
\n", "
" ], "text/plain": [ " opendoar_id repository_name \\\n", "0 101 utrecht university repository \n", "1 115 dspace at indian institute of management kozhi... \n", "2 41 caltech engineering and science online \n", "3 119 dcu online research access service \n", "4 129 earth-prints repository \n", "\n", " type subject \\\n", "0 institutional [multidisciplinary] \n", "1 institutional [ecology and environment, social sciences gene... \n", "2 institutional [biology and biochemistry, chemistry and chemi... \n", "3 institutional [multidisciplinary] \n", "4 disciplinary [earth and planetary sciences] \n", "\n", " institution \n", "0 [[university of utrecht, [universiteit utrecht... \n", "1 [[indian institute of management kozhikode, [i... \n", "2 [[california institute of technology, [caltech... \n", "3 [[dublin city university, [dcu], ie, [], , htt... \n", "4 [[istituto nazionale di geofisica e vulcanolog... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df = pd.read_csv('../data/raw/openDoar.tsv', delimiter='\\t',\n", " converters={'subject': ast.literal_eval,\n", " 'additional_name': ast.literal_eval,\n", " 'opendoar_id': ast.literal_eval,\n", " 'content_type': ast.literal_eval,\n", " 'institution': ast.literal_eval\n", " },\n", " usecols=['opendoar_id', 'repository_name', 'subject', 'type', 'institution'])\n", "opendoar_df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
opendoar_idrepository_nametypesubjectinstitution
count5707.0000005707570757075707
uniqueNaN567048205098
topNaNarchinstitutional[multidisciplinary][[rijksuniversiteit groningen, [rug], nl, [], ...
freqNaN35067321226
mean4008.118801NaNNaNNaNNaN
std2869.948770NaNNaNNaNNaN
min2.000000NaNNaNNaNNaN
25%1823.000000NaNNaNNaNNaN
50%3361.000000NaNNaNNaNNaN
75%5095.000000NaNNaNNaNNaN
max10175.000000NaNNaNNaNNaN
\n", "
" ], "text/plain": [ " opendoar_id repository_name type subject \\\n", "count 5707.000000 5707 5707 5707 \n", "unique NaN 5670 4 820 \n", "top NaN arch institutional [multidisciplinary] \n", "freq NaN 3 5067 3212 \n", "mean 4008.118801 NaN NaN NaN \n", "std 2869.948770 NaN NaN NaN \n", "min 2.000000 NaN NaN NaN \n", "25% 1823.000000 NaN NaN NaN \n", "50% 3361.000000 NaN NaN NaN \n", "75% 5095.000000 NaN NaN NaN \n", "max 10175.000000 NaN NaN NaN \n", "\n", " institution \n", "count 5707 \n", "unique 5098 \n", "top [[rijksuniversiteit groningen, [rug], nl, [], ... \n", "freq 26 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**ROAR**" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidhome_pagetitlelocation_countrysubjects
0921http://alcme.oclc.org/ndltd/index.htmlNetworked Digital Library of Theses and Disser...usNaN
11489http://prensahistorica.mcu.es/prensahistorica/...Virtual Library of Historical PressesNaN
2606http://hal.archives-ouvertes.fr/HAL: Hyper Article en LignefrNaN
3606NaNNaNNaNNaN
4606NaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid home_page \\\n", "0 921 http://alcme.oclc.org/ndltd/index.html \n", "1 1489 http://prensahistorica.mcu.es/prensahistorica/... \n", "2 606 http://hal.archives-ouvertes.fr/ \n", "3 606 NaN \n", "4 606 NaN \n", "\n", " title location_country subjects \n", "0 Networked Digital Library of Theses and Disser... us NaN \n", "1 Virtual Library of Historical Press es NaN \n", "2 HAL: Hyper Article en Ligne fr NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN " ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv',\n", " usecols=['eprintid', 'home_page', 'title', 'location_country', 'subjects'])\n", "roar_df.head()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "# roar_df.drop_duplicates(subset=['home_page', 'title' , 'location_country', 'subjects'], keep=False, inplace=True)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidhome_pagetitlelocation_countrysubjects
1062303NaNFaculty Scholarship at The Claremont CollegesusAS
1072303NaNNaNNaNBF
1082303NaNNaNNaNBL
1092303NaNNaNNaNCC
1102303NaNNaNNaNGN
1112303NaNNaNNaNH1
1122303NaNNaNNaNHB
1132303NaNNaNNaNJA
1142303NaNNaNNaNLB
1152303NaNNaNNaNNX
1162303NaNNaNNaNPQ
1172303NaNNaNNaNQA
\n", "
" ], "text/plain": [ " eprintid home_page title \\\n", "106 2303 NaN Faculty Scholarship at The Claremont Colleges \n", "107 2303 NaN NaN \n", "108 2303 NaN NaN \n", "109 2303 NaN NaN \n", "110 2303 NaN NaN \n", "111 2303 NaN NaN \n", "112 2303 NaN NaN \n", "113 2303 NaN NaN \n", "114 2303 NaN NaN \n", "115 2303 NaN NaN \n", "116 2303 NaN NaN \n", "117 2303 NaN NaN \n", "\n", " location_country subjects \n", "106 us AS \n", "107 NaN BF \n", "108 NaN BL \n", "109 NaN CC \n", "110 NaN GN \n", "111 NaN H1 \n", "112 NaN HB \n", "113 NaN JA \n", "114 NaN LB \n", "115 NaN NX \n", "116 NaN PQ \n", "117 NaN QA " ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df[roar_df.eprintid == 2303]" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidhome_pagetitlelocation_countrysubjects
count5314.0000005263526850241225
uniqueNaN51565027134123
topNaNhttp://ir.lib.isu.edu.tw/Repositorio InstitucionalusH1
freqNaN37877147
mean6389.464434NaNNaNNaNNaN
std5159.573937NaNNaNNaNNaN
min1.000000NaNNaNNaNNaN
25%1490.250000NaNNaNNaNNaN
50%4990.500000NaNNaNNaNNaN
75%10452.750000NaNNaNNaNNaN
max17302.000000NaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid home_page title \\\n", "count 5314.000000 5263 5268 \n", "unique NaN 5156 5027 \n", "top NaN http://ir.lib.isu.edu.tw/ Repositorio Institucional \n", "freq NaN 3 7 \n", "mean 6389.464434 NaN NaN \n", "std 5159.573937 NaN NaN \n", "min 1.000000 NaN NaN \n", "25% 1490.250000 NaN NaN \n", "50% 4990.500000 NaN NaN \n", "75% 10452.750000 NaN NaN \n", "max 17302.000000 NaN NaN \n", "\n", " location_country subjects \n", "count 5024 1225 \n", "unique 134 123 \n", "top us H1 \n", "freq 877 147 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**FAIRsharing**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_nameshort_namefs_urlurlcountriessubjects
0GenBankGenBankhttps://fairsharing.org/10.25504/FAIRsharing.9...https://www.ncbi.nlm.nih.gov/genbank/European Union,Japan,United StatesBioinformatics,Data Management,Data Submission...
1GlycoNAVIGlycoNAVIhttps://fairsharing.org/10.25504/FAIRsharing.w...https://glyconavi.org/JapanChemistry,Glycomics,Life Science,Organic Chemi...
2ADHDgeneADHDgenehttps://fairsharing.org/10.25504/FAIRsharing.m...http://adhd.psych.ac.cn/ChinaBiomedical Science,Genetics
3Allele frequency resource for research and tea...ALFREDhttps://fairsharing.org/10.25504/FAIRsharing.y...http://alfred.med.yale.eduUnited StatesLife Science
4Animal Transcription Factor DatabaseAnimalTFDBhttps://fairsharing.org/10.25504/FAIRsharing.e...http://bioinfo.life.hust.edu.cn/AnimalTFDB/ChinaLife Science
\n", "
" ], "text/plain": [ " full_name short_name \\\n", "0 GenBank GenBank \n", "1 GlycoNAVI GlycoNAVI \n", "2 ADHDgene ADHDgene \n", "3 Allele frequency resource for research and tea... ALFRED \n", "4 Animal Transcription Factor Database AnimalTFDB \n", "\n", " fs_url \\\n", "0 https://fairsharing.org/10.25504/FAIRsharing.9... \n", "1 https://fairsharing.org/10.25504/FAIRsharing.w... \n", "2 https://fairsharing.org/10.25504/FAIRsharing.m... \n", "3 https://fairsharing.org/10.25504/FAIRsharing.y... \n", "4 https://fairsharing.org/10.25504/FAIRsharing.e... \n", "\n", " url \\\n", "0 https://www.ncbi.nlm.nih.gov/genbank/ \n", "1 https://glyconavi.org/ \n", "2 http://adhd.psych.ac.cn/ \n", "3 http://alfred.med.yale.edu \n", "4 http://bioinfo.life.hust.edu.cn/AnimalTFDB/ \n", "\n", " countries \\\n", "0 European Union,Japan,United States \n", "1 Japan \n", "2 China \n", "3 United States \n", "4 China \n", "\n", " subjects \n", "0 Bioinformatics,Data Management,Data Submission... \n", "1 Chemistry,Glycomics,Life Science,Organic Chemi... \n", "2 Biomedical Science,Genetics \n", "3 Life Science \n", "4 Life Science " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df = pd.read_csv('../data/raw/FAIRsharingDBrec_summary20210304.csv', \n", " delimiter='|', header=0,\n", " names=['full_name', 'short_name', 'fs_url', 'url', 'countries', 'subjects'])\n", "fairsharing_df.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_nameshort_namefs_urlurlcountriessubjects
count175217521752175217491690
unique1752174117521752178834
topCiteAbCGDhttps://fairsharing.org/10.25504/FAIRsharing.1...http://www.plexdb.org/United StatesLife Science
freq1311588367
\n", "
" ], "text/plain": [ " full_name short_name \\\n", "count 1752 1752 \n", "unique 1752 1741 \n", "top CiteAb CGD \n", "freq 1 3 \n", "\n", " fs_url \\\n", "count 1752 \n", "unique 1752 \n", "top https://fairsharing.org/10.25504/FAIRsharing.1... \n", "freq 1 \n", "\n", " url countries subjects \n", "count 1752 1749 1690 \n", "unique 1752 178 834 \n", "top http://www.plexdb.org/ United States Life Science \n", "freq 1 588 367 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df.describe(include='all')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }