{ "cells": [ { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "import reverse_geocoder as rg\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import pycountry_convert\n", "\n", "import matplotlib.pyplot as plt\n", "from matplotlib_venn import venn2, venn2_circles\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def country_to_countrycode(country):\n", " if pd.isna(country):\n", " return np.nan\n", " else:\n", " try:\n", " return pycountry_convert.country_name_to_country_alpha2(country)\n", " except:\n", " return np.nan\n", "\n", "def countrycode_to_continent(country_code):\n", " if pd.isna(country_code):\n", " return np.nan\n", " else:\n", " try:\n", " return pycountry_convert.country_alpha2_to_continent_code(country_code)\n", " except:\n", " return np.nan" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**FAIRsharing**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_nameshort_namefs_urlurlcountriessubjects
0GenBankGenBankhttps://fairsharing.org/10.25504/FAIRsharing.9...https://www.ncbi.nlm.nih.gov/genbank/[European Union, Japan, United States][Bioinformatics, Data Management, Data Submiss...
1GlycoNAVIGlycoNAVIhttps://fairsharing.org/10.25504/FAIRsharing.w...https://glyconavi.org/[Japan][Chemistry, Glycomics, Life Science, Organic C...
2ADHDgeneADHDgenehttps://fairsharing.org/10.25504/FAIRsharing.m...http://adhd.psych.ac.cn/[China][Biomedical Science, Genetics]
3Allele frequency resource for research and tea...ALFREDhttps://fairsharing.org/10.25504/FAIRsharing.y...http://alfred.med.yale.edu[United States][Life Science]
4Animal Transcription Factor DatabaseAnimalTFDBhttps://fairsharing.org/10.25504/FAIRsharing.e...http://bioinfo.life.hust.edu.cn/AnimalTFDB/[China][Life Science]
\n", "
" ], "text/plain": [ " full_name short_name \\\n", "0 GenBank GenBank \n", "1 GlycoNAVI GlycoNAVI \n", "2 ADHDgene ADHDgene \n", "3 Allele frequency resource for research and tea... ALFRED \n", "4 Animal Transcription Factor Database AnimalTFDB \n", "\n", " fs_url \\\n", "0 https://fairsharing.org/10.25504/FAIRsharing.9... \n", "1 https://fairsharing.org/10.25504/FAIRsharing.w... \n", "2 https://fairsharing.org/10.25504/FAIRsharing.m... \n", "3 https://fairsharing.org/10.25504/FAIRsharing.y... \n", "4 https://fairsharing.org/10.25504/FAIRsharing.e... \n", "\n", " url \\\n", "0 https://www.ncbi.nlm.nih.gov/genbank/ \n", "1 https://glyconavi.org/ \n", "2 http://adhd.psych.ac.cn/ \n", "3 http://alfred.med.yale.edu \n", "4 http://bioinfo.life.hust.edu.cn/AnimalTFDB/ \n", "\n", " countries \\\n", "0 [European Union, Japan, United States] \n", "1 [Japan] \n", "2 [China] \n", "3 [United States] \n", "4 [China] \n", "\n", " subjects \n", "0 [Bioinformatics, Data Management, Data Submiss... \n", "1 [Chemistry, Glycomics, Life Science, Organic C... \n", "2 [Biomedical Science, Genetics] \n", "3 [Life Science] \n", "4 [Life Science] " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df = pd.read_csv('../data/raw/FAIRsharingDBrec_summary20210304.csv', \n", " delimiter='|', header=0,\n", " names=['full_name', 'short_name', 'fs_url', 'url', 'countries', 'subjects'])\n", "fairsharing_df['subjects'] = fairsharing_df.subjects.str.split(pat=',')\n", "fairsharing_df['countries'] = fairsharing_df.countries.str.split(pat=',')\n", "fairsharing_df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_nameshort_namefs_urlurlcountriessubjects
count175217521752175217491690
unique1752174117521752178834
topFunTree: A Resource For Exploring The Function...CGDhttps://fairsharing.org/10.25504/FAIRsharing.5...https://idn.ceos.org[United States][Life Science]
freq1311588367
\n", "
" ], "text/plain": [ " full_name short_name \\\n", "count 1752 1752 \n", "unique 1752 1741 \n", "top FunTree: A Resource For Exploring The Function... CGD \n", "freq 1 3 \n", "\n", " fs_url \\\n", "count 1752 \n", "unique 1752 \n", "top https://fairsharing.org/10.25504/FAIRsharing.5... \n", "freq 1 \n", "\n", " url countries subjects \n", "count 1752 1749 1690 \n", "unique 1752 178 834 \n", "top https://idn.ceos.org [United States] [Life Science] \n", "freq 1 588 367 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**re3data**" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexidurlofficial_nameenglish_namedescriptionlatitudelongitudesubjects
0410|re3data_____::3f2e20af26ead0432f5470d8b739638dhttp://planttfdb.cbi.pku.edu.cn/Plant Transcription Factor DatabasePlantTFDBNaN0.00.0['Life Sciences', 'Basic Biological and Medica...
1710|re3data_____::e1db3f9d2fa6c8d8067bc471ab50bdfchttps://spdf.gsfc.nasa.gov/Space Physics Data FacilityNASA's Space Physics Data Facility SPDFNaN0.00.0['Natural Sciences', 'Astrophysics and Astrono...
21310|re3data_____::59521daca59ac29b811343cc4cd370cfhttp://card.westgis.ac.cn/Cold and Arid Regions Science Data Center at L...CARD WDC for Glaciology and Geocryology World ...NaN0.00.0['Natural Sciences', 'Geosciences (including G...
31410|re3data_____::ec1ba1674c852466c266acb64c618d15https://www.psycharchives.org/PsycharchivesNaNNaN0.00.0['Humanities and Social Sciences', 'Psychology...
41910|re3data_____::2ada591fb1bc9aee72a6d3e0c1ae8a76https://www.ihfc-iugg.org/products/global-heat...The Global Heat Flow Database of the Internati...International Heat-flow DatabaseNaN0.00.0['Natural Sciences', 'Geology and Palaeontolog...
\n", "
" ], "text/plain": [ " index id \\\n", "0 4 10|re3data_____::3f2e20af26ead0432f5470d8b739638d \n", "1 7 10|re3data_____::e1db3f9d2fa6c8d8067bc471ab50bdfc \n", "2 13 10|re3data_____::59521daca59ac29b811343cc4cd370cf \n", "3 14 10|re3data_____::ec1ba1674c852466c266acb64c618d15 \n", "4 19 10|re3data_____::2ada591fb1bc9aee72a6d3e0c1ae8a76 \n", "\n", " url \\\n", "0 http://planttfdb.cbi.pku.edu.cn/ \n", "1 https://spdf.gsfc.nasa.gov/ \n", "2 http://card.westgis.ac.cn/ \n", "3 https://www.psycharchives.org/ \n", "4 https://www.ihfc-iugg.org/products/global-heat... \n", "\n", " official_name \\\n", "0 Plant Transcription Factor Database \n", "1 Space Physics Data Facility \n", "2 Cold and Arid Regions Science Data Center at L... \n", "3 Psycharchives \n", "4 The Global Heat Flow Database of the Internati... \n", "\n", " english_name description latitude \\\n", "0 PlantTFDB NaN 0.0 \n", "1 NASA's Space Physics Data Facility SPDF NaN 0.0 \n", "2 CARD WDC for Glaciology and Geocryology World ... NaN 0.0 \n", "3 NaN NaN 0.0 \n", "4 International Heat-flow Database NaN 0.0 \n", "\n", " longitude subjects \n", "0 0.0 ['Life Sciences', 'Basic Biological and Medica... \n", "1 0.0 ['Natural Sciences', 'Astrophysics and Astrono... \n", "2 0.0 ['Natural Sciences', 'Geosciences (including G... \n", "3 0.0 ['Humanities and Social Sciences', 'Psychology... \n", "4 0.0 ['Natural Sciences', 'Geology and Palaeontolog... " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df = pd.read_csv('../data/raw/re3data_opendoar.csv')\n", "re3data_df = re3data_df[re3data_df.id.str.contains('re3data')].reset_index()\n", "re3data_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexidurlofficial_nameenglish_namedescriptionlatitudelongitudesubjects
count2693.0000002693267326932034382693.0000002693.0000002693
uniqueNaN269326612668201038NaNNaN1427
topNaN10|re3data_____::fc8141eebc533cb225498718479f4e66http://wdcpc.org/European Climate Assessment & Dataset projectECA&DThe Atmospheric Science Data Center (ASDC) at ...NaNNaN['Humanities and Social Sciences', 'Life Scien...
freqNaN12221NaNNaN209
mean4443.650947NaNNaNNaNNaNNaN0.1144970.067998NaN
std2518.294468NaNNaNNaNNaNNaN4.5854692.447173NaN
min4.000000NaNNaNNaNNaNNaN0.0000000.000000NaN
25%2266.000000NaNNaNNaNNaNNaN0.0000000.000000NaN
50%4506.000000NaNNaNNaNNaNNaN0.0000000.000000NaN
75%6660.000000NaNNaNNaNNaNNaN0.0000000.000000NaN
max8705.000000NaNNaNNaNNaNNaN234.000000123.000000NaN
\n", "
" ], "text/plain": [ " index id \\\n", "count 2693.000000 2693 \n", "unique NaN 2693 \n", "top NaN 10|re3data_____::fc8141eebc533cb225498718479f4e66 \n", "freq NaN 1 \n", "mean 4443.650947 NaN \n", "std 2518.294468 NaN \n", "min 4.000000 NaN \n", "25% 2266.000000 NaN \n", "50% 4506.000000 NaN \n", "75% 6660.000000 NaN \n", "max 8705.000000 NaN \n", "\n", " url official_name \\\n", "count 2673 2693 \n", "unique 2661 2668 \n", "top http://wdcpc.org/ European Climate Assessment & Dataset project \n", "freq 2 2 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " english_name description \\\n", "count 2034 38 \n", "unique 2010 38 \n", "top ECA&D The Atmospheric Science Data Center (ASDC) at ... \n", "freq 2 1 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " latitude longitude \\\n", "count 2693.000000 2693.000000 \n", "unique NaN NaN \n", "top NaN NaN \n", "freq NaN NaN \n", "mean 0.114497 0.067998 \n", "std 4.585469 2.447173 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", "max 234.000000 123.000000 \n", "\n", " subjects \n", "count 2693 \n", "unique 1427 \n", "top ['Humanities and Social Sciences', 'Life Scien... \n", "freq 209 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**OpenDOAR**" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexidurlofficial_nameenglish_namedescriptionlatitudelongitudesubjects
0010|opendoar____::e833e042f509c996b1b25324d56659fbhttp://www.bilbao.net/bldBLD - Bilboko Liburutegi DigitalaBLD - Bilboko Liburutegi DigitalaBLD is a repository of digital documents, desi...43.256699-2.924100[]
1110|opendoar____::f621585df244e9596dc70a39b579efb1https://researchdirect.westernsydney.edu.au/Western Sydney ResearchDirectWestern Sydney ResearchDirectNaN0.0000000.000000[]
2210|opendoar____::437d7d1d97917cd627a34a6a0fb41136http://redress.lancs.ac.uk/Learning_Space/Learning Space CatalogueNaNThis repository is a Social Science e-Science ...54.010760-2.784990['Social Sciences General', 'Science General',...
3310|opendoar____::d840cc5d906c3e9c84374c8919d2074ehttp://digitallibrary.usc.edu/search/controlle...USC Digital LibraryUSC Digital LibraryThis is an institutional repository providing ...34.052200-118.242996[]
4510|opendoar____::4ba3c163cd1efd4c14e3a415fa0a3010http://www.ufgd.edu.br:8080/jspui/Repositório de Divulgação das Produções Cientí...Repositório de Divulgação das Produções Cientí...This site provides access to the research outp...-22.221800-54.806400[]
\n", "
" ], "text/plain": [ " index id \\\n", "0 0 10|opendoar____::e833e042f509c996b1b25324d56659fb \n", "1 1 10|opendoar____::f621585df244e9596dc70a39b579efb1 \n", "2 2 10|opendoar____::437d7d1d97917cd627a34a6a0fb41136 \n", "3 3 10|opendoar____::d840cc5d906c3e9c84374c8919d2074e \n", "4 5 10|opendoar____::4ba3c163cd1efd4c14e3a415fa0a3010 \n", "\n", " url \\\n", "0 http://www.bilbao.net/bld \n", "1 https://researchdirect.westernsydney.edu.au/ \n", "2 http://redress.lancs.ac.uk/Learning_Space/ \n", "3 http://digitallibrary.usc.edu/search/controlle... \n", "4 http://www.ufgd.edu.br:8080/jspui/ \n", "\n", " official_name \\\n", "0 BLD - Bilboko Liburutegi Digitala \n", "1 Western Sydney ResearchDirect \n", "2 Learning Space Catalogue \n", "3 USC Digital Library \n", "4 Repositório de Divulgação das Produções Cientí... \n", "\n", " english_name \\\n", "0 BLD - Bilboko Liburutegi Digitala \n", "1 Western Sydney ResearchDirect \n", "2 NaN \n", "3 USC Digital Library \n", "4 Repositório de Divulgação das Produções Cientí... \n", "\n", " description latitude longitude \\\n", "0 BLD is a repository of digital documents, desi... 43.256699 -2.924100 \n", "1 NaN 0.000000 0.000000 \n", "2 This repository is a Social Science e-Science ... 54.010760 -2.784990 \n", "3 This is an institutional repository providing ... 34.052200 -118.242996 \n", "4 This site provides access to the research outp... -22.221800 -54.806400 \n", "\n", " subjects \n", "0 [] \n", "1 [] \n", "2 ['Social Sciences General', 'Science General',... \n", "3 [] \n", "4 [] " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df = pd.read_csv('../data/raw/re3data_opendoar.csv')\n", "opendoar_df = opendoar_df[opendoar_df.id.str.contains('opendoar')].reset_index()\n", "opendoar_df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexidurlofficial_nameenglish_namedescriptionlatitudelongitudesubjects
count6014.000000601460136014550057766014.0000006014.0000006014
uniqueNaN60145953594654134920NaNNaN201
topNaN10|opendoar____::17256f049f1e3fede17c7a313f7657f4http://harp.lib.hiroshima-u.ac.jp/Hiroshima Associated Repository PortalAURAThis site provides access to the research outp...NaNNaN[]
freqNaN133498NaNNaN5273
mean4312.407549NaNNaNNaNNaNNaN38.6493937.810948NaN
std2510.699848NaNNaNNaNNaNNaN788.40617371.689788NaN
min0.000000NaNNaNNaNNaNNaN-79.029999-683.103027NaN
25%2129.250000NaNNaNNaNNaNNaN4.644632-49.273300NaN
50%4297.000000NaNNaNNaNNaNNaN37.9304494.788870NaN
75%6476.750000NaNNaNNaNNaNNaN47.29440030.685501NaN
max8706.000000NaNNaNNaNNaNNaN61138.800781178.438995NaN
\n", "
" ], "text/plain": [ " index id \\\n", "count 6014.000000 6014 \n", "unique NaN 6014 \n", "top NaN 10|opendoar____::17256f049f1e3fede17c7a313f7657f4 \n", "freq NaN 1 \n", "mean 4312.407549 NaN \n", "std 2510.699848 NaN \n", "min 0.000000 NaN \n", "25% 2129.250000 NaN \n", "50% 4297.000000 NaN \n", "75% 6476.750000 NaN \n", "max 8706.000000 NaN \n", "\n", " url \\\n", "count 6013 \n", "unique 5953 \n", "top http://harp.lib.hiroshima-u.ac.jp/ \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " official_name english_name \\\n", "count 6014 5500 \n", "unique 5946 5413 \n", "top Hiroshima Associated Repository Portal AURA \n", "freq 3 4 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " description latitude \\\n", "count 5776 6014.000000 \n", "unique 4920 NaN \n", "top This site provides access to the research outp... NaN \n", "freq 98 NaN \n", "mean NaN 38.649393 \n", "std NaN 788.406173 \n", "min NaN -79.029999 \n", "25% NaN 4.644632 \n", "50% NaN 37.930449 \n", "75% NaN 47.294400 \n", "max NaN 61138.800781 \n", "\n", " longitude subjects \n", "count 6014.000000 6014 \n", "unique NaN 201 \n", "top NaN [] \n", "freq NaN 5273 \n", "mean 7.810948 NaN \n", "std 71.689788 NaN \n", "min -683.103027 NaN \n", "25% -49.273300 NaN \n", "50% 4.788870 NaN \n", "75% 30.685501 NaN \n", "max 178.438995 NaN " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Basic cleaning" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**re3data**" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "re3data_df.loc[(re3data_df.latitude == 0.0) & (re3data_df.longitude == 0.0), ['latitude', 'longitude']] = [np.nan, np.nan]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 ['Life Sciences', 'Basic Biological and Medica...\n", "1 ['Natural Sciences', 'Astrophysics and Astrono...\n", "2 ['Natural Sciences', 'Geosciences (including G...\n", "3 ['Humanities and Social Sciences', 'Psychology...\n", "4 ['Natural Sciences', 'Geology and Palaeontolog...\n", " ... \n", "2688 ['Life Sciences', 'Basic Biological and Medica...\n", "2689 ['Natural Sciences', 'Atmospheric Science and ...\n", "2690 ['Natural Sciences', 'Atmospheric Science and ...\n", "2691 ['Natural Sciences', 'Atmospheric Science and ...\n", "2692 ['Life Sciences', 'Plant Sciences', 'Plant Gen...\n", "Name: subjects, Length: 2693, dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df.subjects" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "re3data_df['subjects'] = re3data_df.subjects.apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def merge_lists(lists):\n", " res = []\n", " for l in lists:\n", " res = res + l\n", " return res\n", "\n", "re3data_cleaned_subjects = re3data_df.explode('subjects').subjects.str.split(',| and ', expand=True)\\\n", " .apply(lambda row: row.dropna().tolist(), axis=1)\\\n", " .reset_index()\\\n", " .groupby('index')[0].apply(lambda x: merge_lists(x))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "index\n", "0 [Life Sciences, Basic Biological, Medical Rese...\n", "1 [Natural Sciences, Astrophysics, Astronomy, Ph...\n", "2 [Natural Sciences, Geosciences (including Geog...\n", "3 [Humanities, Social Sciences, Psychology, Soci...\n", "4 [Natural Sciences, Geology, Palaeontology, Geo...\n", " ... \n", "2688 [Life Sciences, Basic Biological, Medical Rese...\n", "2689 [Natural Sciences, Atmospheric Science, Oceano...\n", "2690 [Natural Sciences, Atmospheric Science, Oceano...\n", "2691 [Natural Sciences, Atmospheric Science, Oceano...\n", "2692 [Life Sciences, Plant Sciences, Plant Genetics...\n", "Name: 0, Length: 2693, dtype: object" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_cleaned_subjects" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "re3data_df = re3data_df.join(re3data_cleaned_subjects)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "re3data_df.drop(columns=['subjects'], inplace=True)\n", "re3data_df.rename(columns={0:'subjects'}, inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**OpenDOAR**" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 []\n", "1 []\n", "2 ['Social Sciences General', 'Science General',...\n", "3 []\n", "4 []\n", " ... \n", "6009 ['Multidisciplinary']\n", "6010 []\n", "6011 ['Business and Economics']\n", "6012 ['Earth and Planetary Sciences', 'Ecology and ...\n", "6013 []\n", "Name: subjects, Length: 6014, dtype: object" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df.subjects" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "opendoar_df['subjects'] = opendoar_df.subjects.apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "opendoar_cleaned_subjects = opendoar_df.explode('subjects').subjects.str.split(',| and ', expand=True)\\\n", " .apply(lambda row: row.dropna().tolist(), axis=1)\\\n", " .reset_index()\\\n", " .groupby('index')[0].apply(lambda x: merge_lists(x))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "index\n", "0 []\n", "1 []\n", "2 [Social Sciences General, Science General, Com...\n", "3 []\n", "4 []\n", " ... \n", "6009 [Multidisciplinary]\n", "6010 []\n", "6011 [Business, Economics]\n", "6012 [Earth, Planetary Sciences, Ecology, Environme...\n", "6013 []\n", "Name: 0, Length: 6014, dtype: object" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_cleaned_subjects" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "opendoar_df = opendoar_df.join(opendoar_cleaned_subjects)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "opendoar_df.drop(columns=['subjects'], inplace=True)\n", "opendoar_df.rename(columns={0: 'subjects'}, inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Subjects analysis" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "fairsharing_subjects = fairsharing_df.explode('subjects')\n", "re3data_subjects = re3data_df.explode('subjects')\n", "opendoar_subjects = opendoar_df.explode('subjects')" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "name": "FAIRsharing", "type": "bar", "x": [ "Life Science", "Biomedical Science", "Earth Science", "Genomics", "Environmental Science", "Oceanography", "Biodiversity", "Atmospheric Science", "Epidemiology", "Genetics", "Health Science", "Virology", "Biology", "Proteomics", "Bioinformatics", "Agriculture", "Geology", "Preclinical Studies", "Transcriptomics", "Chemistry", "Comparative Genomics", "Data Management", "Clinical Studies", "Botany", "Functional Genomics", "Medicine", "Geophysics", "Meteorology", "Humanities and Social Sciences", "Natural Science", "Social Science", "Systems Biology", "Geography", "Ecology", "Data Submission", " Annotation and Curation", "Metabolomics", "Engineering Science", "Marine Biology", "Physics", "Economics", "Hydrology", "Ontology and Terminology", "Biochemistry", "Astrophysics and Astronomy", "Phylogenetics", "Molecular biology", "Epigenetics", "Medical Virology", "Remote Sensing", "Infectious Disease Medicine", "Immunology", "Humanities", "Anatomy", "Computational Biology", "Structural Biology", "Neurobiology", "Plant Genetics", "Computer Science", "Public Health", "Knowledge and Information Systems", "Microbiology", "Demographics", "Social and Behavioural Science", "Data Visualization", "Oncology", "Developmental Biology", "Critical Care Medicine", "Hydrogeology", "Data Integration", "Glycomics", "Ecosystem Science", "Soil Science", "Geochemistry", "Population Genetics", "Drug Discovery", "Materials Science", "Water Research", "Neuroscience", "Forest Management", "Plant Breeding", "Metagenomics", "Energy Engineering", "Water Management", "Paleontology", "Software Engineering", "Geodesy", "Taxonomy", "Cell Biology", "Phylogenomics", "Immunogenetics", "Pharmacology", "Mineralogy", "Freshwater Science", "Medical Informatics", "Statistics", "Epigenomics", "Human Genetics", "Phylogeny", "Global Health", "Animal Genetics", "Cheminformatics", "Evolutionary Biology", "Zoology", "Mathematics", "Microbial Ecology", "Population Dynamics", "Political Science", "Nanotechnology", "Psychology", "Physical Geography", "Education Science", "Drug Development", "Culture", "Translational Medicine", "Pathology", "Food Security", "Informatics", "Neurophysiology", "Natural History", "Phenomics", "Nutritional Science", "Computational Neuroscience", "Biotechnology", "Bioengineering", "Geoinformatics", "Data Governance", "Cartography", "History", "Analytical Chemistry", "Organic Chemistry", "Urban Planning", "Plant Anatomy", "Enzymology", "Classical Archaeology", "Animal Husbandry", "Maritime Engineering", "Materials Engineering", "Database Management", "Cardiology", "Anthropology", "Architecture", "Transportation Planning", "Criminology", "Primary Health Care", "Molecular Genetics", "Toxicology", "Omics", "Communication Science", "Agronomy", "Physiology", "Art", "Endocrinology", "Fisheries Science", "Economic and Social History", "Drug Metabolism", "Thermodynamics", "Plant Ecology", "Tropical Medicine", "Aerospace Engineering", "Data Quality", "Chemical Engineering", "Data Mining", "Health Services Research", "Linguistics", "Medicinal Chemistry", "Agricultural Engineering", "Geriatric Medicine", "Toxicogenomics", "Drug Repositioning", "Reproductive Health", "Materials Informatics", "Construction Engineering", "Entomology", "Aquaculture", "Pediatrics", "Agroecology", "Civil Engineering", "Inorganic Molecular Chemistry", "Business Administration", "Respiratory Medicine", "Embryology", "Molecular Microbiology", "Power Engineering", "Composite Materials", "Molecular Infection Biology", "Computational Chemistry", "Synthetic Chemistry", "Synthetic Biology", "Building Engineering Physics", "Farming Systems Research", "Biomaterials", "Pharmacy", "Veterinary Medicine", "Gastroenterology", "Structural Genomics", "Pharmacogenomics", "Occupational Medicine", "Community Care", "Molecular Dynamics", "Fine Arts", "Ancient Cultures", "Human Geography", "Molecular Chemistry", "Quantitative Genetics", " Learning and Training", "Human Biology", "Rural and Agricultural Sociology", "Social Policy", "Social Psychology", "Industrial Engineering", "Jurisprudence", "Research on Teaching", "Limnology", "Agricultural Economics", "Historical Linguistics", "Data Security", "Prehistory", "Geotechnics", "Cultural Studies", "Public Finance", "Art History", "Proteogenomics", "Digital Image Processing", "Surgery", "Plant Cell Biology", " Optical and Plasma Physics", "Safety Science", "Traditional Medicine", " Molecular", "Process Engineering", "Rheumatology", "Telecommunication Engineering", "Plastics Engineering", "Acoustics", "Plant Cultivation", "Religious Studies", "Policy", "Systemic Neuroscience", "Agricultural Law", "Technical Chemistry", "Public Law", "Radiology", "Synthesis Chemistry", "Chemical Biology", "Physical Chemistry", "Horticulture", "Hematology", "Gynecology", "Artificial Intelligence", "Atomic", "Behavioural Biology", "Biological Process Engineering", "Functional Materials Research", "Biological Psychology", "Food Process Engineering", "Biomimetic Chemistry", "Biophysics", "Empirical Social Research", "Electrophysiology", "Electrical Engineering", "Biotherapeutics", "Economic Theory", "Economic Policy", "Building Design", "Developmental Neurobiology", "Dermatology", "Criminal Law", "Component Engineering", "Comparative Neurobiology", "Cognitive Neuroscience", "Clinical Veterinary Medicine", "Clinical Psychology", "Clinical Chemistry", "Classical Philology", "Cellular Neuroscience", "History of Science", "Human-Machine Systems Engineering", "Photogrammetry", "Hydraulic Engineering", "Philosophy", "Personalized Medicine", "Parasitology", "Organic Molecular Chemistry", "Ophthalmology", "Obstetrics", "Neurology", "Musculoskeletal Medicine", "Animal Breeding", "Molecular Physical Chemistry", "Molecular Neuroscience", "Microstructural Mechanical Properties of Materials", "Microbial Physiology", "Microbial Genetics", "Metal-Cutting Manufacturing Engineering", "Medicines Research and Development", "Animal Physiology", "Medical Physics", "Media Studies", "Mechanics", "Mechanical Process Engineering", "Mechanical Engineering", "Materials Structuring and Functionalisation", "Applied Linguistics", "Logistics Engineering", "Literary Studies", "Applied Mathematics", "Landscape Planning", "Applied Microbiology", "Mechanical Behaviour of Construction Materials" ], "y": [ 900, 252, 227, 166, 134, 95, 80, 78, 75, 73, 67, 66, 65, 61, 60, 58, 51, 48, 48, 46, 46, 45, 45, 44, 42, 42, 41, 40, 39, 38, 36, 35, 33, 32, 31, 31, 30, 29, 27, 26, 26, 25, 25, 25, 25, 20, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "name": "re3data", "type": "bar", "visible": "legendonly", "x": [ "Life Sciences", "Natural Sciences", "Humanities", "Social Sciences", "Medicine", "Biology", "Geosciences (including Geography)", "Oceanography", "Atmospheric Science", "Basic Biological", "Medical Research", "Engineering Sciences", "Social", "Behavioural Sciences", "Geodesy", "Geophysics", " Horticulture", " Forestry", "Agriculture", "Veterinary Medicine", "Physics", "Zoology", "Immunology", "Computer Science", "Chemistry", "General Genetics", "Microbiology", " Virology", "Plant Sciences", "Astronomy", "Astrophysics", "Economics", "Bioinformatics", "Theoretical Biology", "Geography", "Water Research", "System Engineering", " Electrical", " Health Services Research", "Public Health", " Social Medicine", "Human Genetics", "Geochemistry", "Crystallography", " Mineralogy", "Developmental Biology", "Empirical Social Research", "Architecture", "Linguistics", " Geoinformatics", " Remote Sensing", " Cartogaphy", " Photogrammetry", "Palaeontology", "Geology", " Quantum Optics", "History", "Optics", " Molecules", "Animal Genetics", " Cell", "Construction Engineering", "Cell Biology", "Neurosciences", "Ecosystem Research", " Biodiversity", "Biochemistry", "Animal Ecology", "Media Studies", "Physics of Atoms", "Plasmas", " Music", "Fine Arts", " Theatre", "Plant Ecology", "Ecosystem Analysis", "Plant Genetics", "Ancient Cultures", "Materials Science", "Cultural Anthropology", "Particles", " Nuclei", "Fields", "Economic", "Statistics", "Econometrics", "Education Sciences", "Epidemiology", " Medical Informatics", " Medical Biometry", "Political Science", "Religious Studies", "Jurisprudence", "Social Policy", " Hydrology", " Integrated Water Resources Management", "Hydrogeology", " Urban Water Management", " Limnology", " Water Chemistry", "Structural Biology", " Social", " Jewish Studies", "Cultures", "Non-European Languages", "Molecular Chemistry", "Mathematics", "Engineering", "Psychology", "Evolution", " Biochemistry", "Genetics of Microorganisms", "Metabolism", "Soil Sciences", "Theoretical Chemistry", "Physical", "Ecology of Agricultural Landscapes", "Analytical Chemistry", " Method Development (Chemistry)", "Condensed Matter Physics", "Pharmacology", "Systems Engineering", "Human Geography", "Literary Studies", "Liquids - Spectroscopy", " Interfaces", "Food Chemistry", "Biophysics", "Basic Forest Research", " Kinetics", "Physical Chemistry of Molecules", " Image", "Language Processing", "Surface Research", "Artificial Intelligence", "Chemical Solid State", "Physical Geography", " Atoms", "Biological Chemistry", " Anthropology", " Plasmas", "Ethnology/Folklore", "Art History", "Thermal Engineering/Process Engineering", "Sociology", "Agricultural Economics", "Theology", "Virology", "Cognitive Neuroscience", "Urbanism", " Transportation", "Infrastructure Planning", "Neuroimaging", "Musicology", " Landscape Planning", " Spatial Planning", "Modern", " Transfusion Medicine", " Oncology", "Medical Physics", "Plant Systematics", "Current History", "Hematology", "Biomedical Technology", " Building", "Medical Microbiology", " Building Design", " Sustainable Building Technology", "Construction History", " Molecular Infection Biology", "Morphology", "Traffic", "Electrical Engineering", "Systematics", "Transport Systems", " Logistics", "Communication Science", "Anatomy", "Pharmacy", "Use of Forest Resources", "Classical Archaeology", "Business Administration", "Inventory Control", "Public Finance", "Occupational Medicine", "Toxicology", "Social History", "Physiology", "Plant Biochemistry", "Training", "Plant Breeding", "Mechanical", "History of Science", "Pediatric", "Research on Teaching", "Process Engineering", "industrial Engineering", " Learning", " Technical Chemistry", "Adolescent Medicine", "Heat Energy Technology", " Legal History", " Legal Theory", "Plant Cultivation", "Radiology", " Fluid Mechanics", " Thermal Machines", "Legal", "Political Philosophy", "Nuclear Medicine", "Systemic Neuroscience", "Microbial Ecology", "Animal Physiology", "Applied Microbiology", " Behaviour", " Computational Neuroscience", "Jewish Studies", "Materials Engineering", "Organic Molecular Chemistry", "General", "Research on Socialization", "Professions", "Public Law", "Educational Institutions", "Software Technology", "Typology", "Basic Veterinary Medical Science", "Medieval History", "Early Modern History", "Surfaces", " Material Characterisation", "Philosophy", "Forensic Medicine", " Historical Linguistics", " Non-European Languages", "Physical Chemistry of Solids", "Pathology", " Soft Matter", "Modelling", " Metabolism", " Biological Physics", "Theory", "Ancient History", "Ancient Near Eastern Studies", "Molecular Neuroscience", "Agricultural", "Neurogenetics", " Nonlinear Dynamics", "Prehistory", "Statistical Physics", "Egyptology", "Criminology", "Inorganic Molecular Chemistry", "Gastroenterology", "Food Process Engineering", "Obstetrics", "Constructive Mechanical Engineering", "Acoustics", "Human Factors", "Gynaecology", "Oceania Studies", "Electrical Energy Generation", " Distribution", " Ergonomics", "Sensory", " Material Synthesis", "Mechanics", "Plant Cell", "Solid State", " Human-Machine Systems", "Biological", "Biomimetic Chemistry", " American", "Surface Chemistry", " Application", "African", "Behavioural Biology", "Therapy", " Methodology", " Medical Psychology", " Semitic Studies", "Plant Nutrition", "Radiobiology", "Clinical Veterinary Medicine", "Operating", "Polymer Research", "General Theoretical Chemistry", " Communication", "Islamic Studies", "Nutritional Sciences", " Breeding", "Radiation Oncology", " Clinical Psychology", "Endocrinology", "Theatre", "Differential Psychology", "Applied Linguistics", " Arabian Studies", " Diagnostics", "Hygiene", "Animal Husbandry", "Information Systems", "Basic Research on Pathogenesis", "Individual Linguistics", " Diabetology", "History of Education", "European", "Developmental", "Asian Studies", "Plant Physiology", "Energy Process Engineering", "Experimental Condensed Matter Physics", " High-Frequency", "General Education", " Industrial", "Protestant Theology", "Network Technology", "Communication", " Theoretical Electrical Engineering", "American Literature", " Hydraulic Engineering", "Clinical Neurosciences III - Ophthalmology", "Geotechnics", "Social Psychology", "Pathobiochemistry", "Geriatric Medicine", "Educational Psychology", "Organisational Psychology", "Clinical Chemistry", "Sociological Theory", "Gerontology", "Rheumatology", " Allergology", "Dentistry", " Construction Operation", "Dermatology", " Geosciences (including Geography)", "Sructural Engineering", " Control Systems", "Sintered Metallic", " Clinical Immunology", "Roman Catholic Theology", "Reproductive Medicine/Biology", " Clinical Infectiology Intensive Care Medicine", " Building Informatics", "Geosciences (including Geography) ", " Atmospheric Science", "Automation", "Veterinary Medicine ", "Mathematical Psychology", "Biological Process Engineering", " Biological", "Preparatory", "Thermodynamics", " Angiology", " Robotics", " Oral Surgery", "Comparative Literature", "Atmospheric Science ", "Biomaterials", "Law of Criminal Procedure", "Physical Chemistry of Polymers", "Kinetics of Materials", "Cardiology", " Mechatronics", "Criminal Law", "Cultural Studies", "Pneumology", "Polymer Materials", "Ceramic Materials", "Technical Thermodynamics", "Theoretical Condensed Matter Physics", "Urology", "Electronic Semiconductors", " Circuits", "Traumatology", " Chemistry", "Theoretical Computer Science", "Technical Chemistry", "Thermal Process Engineering", " Building Physics", "Theoretical Physics of Polymers", "Thermal Processes", "Inter-organismic Interactions of Plants", "Economic Theory", " Thermomechanical Treatment of Materials", "Cellular Neuroscience", "Life Sciences ", "Biological Psychiatry", "Measurement Systems", "Medical Research ", "Medieval German Literature", "Chemical", "Metallurgical", "Classical Philology", "Microstructural Mechanical Properties of Materials", "Clinical Neurosciences I - Neurology", "History of Philosophy", "Orthopaedics", " Components", " Systems", "Comparative Neurobiology", "Composite Materials", " Neurosurgery", "Construction Material Sciences", " Life Sciences ", "Cardiothoracic Surgery", "Private Law", " General Genetics", "Experimental", "Social Sciences ", "Developmental Neurobiology", " Agriculture" ], "y": [ 1440, 1325, 1238, 1222, 1014, 882, 760, 581, 535, 514, 513, 496, 451, 432, 360, 326, 317, 317, 316, 315, 308, 239, 234, 227, 224, 220, 212, 212, 210, 205, 205, 204, 175, 175, 159, 143, 142, 142, 131, 131, 131, 117, 108, 108, 108, 104, 103, 101, 101, 99, 99, 99, 99, 98, 98, 96, 96, 96, 96, 94, 94, 86, 85, 81, 79, 79, 79, 79, 78, 75, 75, 74, 74, 74, 67, 67, 65, 65, 64, 63, 63, 63, 63, 62, 62, 62, 61, 57, 57, 57, 56, 52, 50, 50, 48, 48, 48, 48, 48, 48, 45, 44, 44, 44, 44, 42, 41, 40, 38, 37, 36, 36, 36, 34, 33, 33, 31, 27, 27, 26, 25, 25, 24, 24, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "name": "OpenDOAR", "type": "bar", "visible": "legendonly", "x": [ "Multidisciplinary", "Medicine", "Health", "Science General", "Technology General", "Economics", "Business", "Archaeology", "Social Sciences General", "History", "Politics", "Environment", "Ecology", "Law", "Computers", "IT", "Biology", "Biochemistry", "Information Science", "Library", "Humanities General", "Arts", "Education", " Food", "Agriculture", "Statistics", "Mathematics", "Literature", "Veterinary", "Astronomy", "Physics", "Geography", "Regional Studies", "Language", "Religion", "Chemical Technology", "Chemistry", "Philosophy", "Fine", "Performing Arts", "Planning", "Psychology", "Management", "Planetary Sciences", "Earth", "Electrical", "Electronic Engineering", "Architecture", "Civil Engineering", "Mechanical Engineering", "Materials", " History", " Philosophy", " Health", "Social Sciences General ", " Language", " Technology General", " Law", "Performing Arts ", " Science General", "Medicine ", "IT ", "Veterinary " ], "y": [ 466, 67, 66, 63, 53, 52, 52, 49, 48, 47, 44, 44, 44, 43, 43, 42, 40, 40, 36, 36, 35, 35, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 29, 27, 27, 27, 25, 23, 22, 17, 17, 17, 16, 16, 12, 12, 12, 8, 7, 7, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Subject coverage" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data1 = fairsharing_subjects.groupby('subjects')[['url']].count().sort_values('url', ascending=False)\n", "data2 = re3data_subjects.groupby('subjects')[['url']].count().sort_values('url', ascending=False)\n", "data3 = opendoar_subjects.groupby('subjects')[['url']].count().sort_values('url', ascending=False)\n", "\n", "plot = [\n", " go.Bar(\n", " x=data1.index,\n", " y=data1['url'],\n", " name='FAIRsharing'\n", " ),\n", " go.Bar(\n", " x=data2.index,\n", " y=data2['url'],\n", " name='re3data',\n", " visible = 'legendonly'\n", " ),\n", " go.Bar(\n", " x=data3.index,\n", " y=data3['url'],\n", " name='OpenDOAR',\n", " visible = 'legendonly'\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Subject coverage',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "\n", "fig = go.Figure(plot, layout).show()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "311" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(fairsharing_subjects.subjects.unique())" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "414" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(re3data_subjects.subjects.unique())" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "64" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(opendoar_subjects.subjects.unique())" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([nan, 'Social Sciences General', 'Science General', 'Computers',\n", " 'IT', 'Physics', 'Astronomy', 'Multidisciplinary', 'Arts',\n", " 'Humanities General', 'Philosophy', 'Religion', 'Business',\n", " 'Economics', 'Law', 'Politics', 'Psychology', 'Health', 'Medicine',\n", " 'History', 'Archaeology', 'Education', 'Technology General',\n", " 'Library', 'Information Science', 'Earth', 'Planetary Sciences',\n", " 'Geography', 'Regional Studies', 'Architecture', 'Ecology',\n", " 'Environment', 'Electrical', 'Electronic Engineering', 'Biology',\n", " 'Biochemistry', 'Mathematics', 'Statistics', 'Civil Engineering',\n", " 'Agriculture', ' Food', 'Veterinary', 'Language', 'Literature',\n", " 'Chemistry', 'Chemical Technology', 'Mechanical Engineering',\n", " 'Materials', 'Fine', 'Performing Arts', 'Management', 'Planning',\n", " ' Language', ' Health', 'Veterinary ', ' Technology General',\n", " 'Medicine ', ' History', 'IT ', ' Law', 'Social Sciences General ',\n", " ' Science General', ' Philosophy', 'Performing Arts '],\n", " dtype=object)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_subjects.subjects.unique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Geographic analysis" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "fairsharing_countries = fairsharing_df.explode('countries')\n", "fairsharing_countries['countrycode'] = fairsharing_countries.countries.map(lambda c: country_to_countrycode(c))\n", "fairsharing_countries['continent'] = fairsharing_countries.countrycode.map(lambda cc: countrycode_to_continent(cc))" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['European Union', 'Republic of Ireland', 'Worldwide', nan],\n", " dtype=object)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_countries[fairsharing_countries.countrycode.isna()].countries.unique()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['European Union', 'Republic of Ireland', 'Worldwide', 'Antarctica',\n", " nan], dtype=object)" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_countries[fairsharing_countries.continent.isna()].countries.unique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Manually fixing exceptions" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "fairsharing_countries.loc[fairsharing_countries.countries == 'Republic of Ireland', ['countries', 'countrycode', 'continent']] = ['Ireland', 'IE', 'EU']\n", "fairsharing_countries.loc[fairsharing_countries.countries == 'Antarctica', ['countrycode', 'continent']] = ['AQ', np.nan]\n", "fairsharing_countries.loc[fairsharing_countries.countries == 'European Union', ['countrycode', 'continent']] = ['EU', 'EU']" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_nameshort_namefs_urlurlcountriessubjectscountrycodecontinent
915Antabif IPT - AntOBIS IPT - GBIF BelgiumAntabif IPT - AntOBIS IPT - GBIF Belgiumhttps://fairsharing.org/10.25504/FAIRsharing.e...http://ipt.biodiversity.aq/Antarctica[Biodiversity, Life Science]AQNaN
\n", "
" ], "text/plain": [ " full_name \\\n", "915 Antabif IPT - AntOBIS IPT - GBIF Belgium \n", "\n", " short_name \\\n", "915 Antabif IPT - AntOBIS IPT - GBIF Belgium \n", "\n", " fs_url \\\n", "915 https://fairsharing.org/10.25504/FAIRsharing.e... \n", "\n", " url countries subjects \\\n", "915 http://ipt.biodiversity.aq/ Antarctica [Biodiversity, Life Science] \n", "\n", " countrycode continent \n", "915 AQ NaN " ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_countries[fairsharing_countries.countrycode == 'AQ']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For re3data" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "index 5\n", "id 5\n", "url 5\n", "official_name 5\n", "english_name 5\n", "description 5\n", "latitude 5\n", "longitude 5\n", "subjects 5\n", "dtype: int64" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df[re3data_df.latitude.notna()].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Location is basically absent in re3data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For OpenDOAR" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading formatted geocoded file...\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
latlonnameadmin1admin2cccontinent
043.26271-2.92528BilbaoBasque CountryBizkaiaESEU
14.88447-1.75536TakoradiWesternGHAF
253.98333-2.78333GalgateEnglandLancashireGBEU
334.05223-118.24368Los AngelesCaliforniaLos Angeles CountyUSNA
4-22.22111-54.80556DouradosMato Grosso do SulDouradosBRSA
........................
600940.8563114.24641NapoliCampaniaProvincia di NapoliITEU
601038.1939415.55256MessinaSicilyMessinaITEU
601154.3213310.13489KielSchleswig-HolsteinDEEU
601243.40785-73.25955GranvilleNew YorkWashington CountyUSNA
601333.96095-83.37794AthensGeorgiaClarke CountyUSNA
\n", "

6014 rows × 7 columns

\n", "
" ], "text/plain": [ " lat lon name admin1 \\\n", "0 43.26271 -2.92528 Bilbao Basque Country \n", "1 4.88447 -1.75536 Takoradi Western \n", "2 53.98333 -2.78333 Galgate England \n", "3 34.05223 -118.24368 Los Angeles California \n", "4 -22.22111 -54.80556 Dourados Mato Grosso do Sul \n", "... ... ... ... ... \n", "6009 40.85631 14.24641 Napoli Campania \n", "6010 38.19394 15.55256 Messina Sicily \n", "6011 54.32133 10.13489 Kiel Schleswig-Holstein \n", "6012 43.40785 -73.25955 Granville New York \n", "6013 33.96095 -83.37794 Athens Georgia \n", "\n", " admin2 cc continent \n", "0 Bizkaia ES EU \n", "1 GH AF \n", "2 Lancashire GB EU \n", "3 Los Angeles County US NA \n", "4 Dourados BR SA \n", "... ... .. ... \n", "6009 Provincia di Napoli IT EU \n", "6010 Messina IT EU \n", "6011 DE EU \n", "6012 Washington County US NA \n", "6013 Clarke County US NA \n", "\n", "[6014 rows x 7 columns]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reverse_geocoding = pd.DataFrame(rg.search(opendoar_df[['latitude', 'longitude']].apply(tuple, axis=1).tolist()))\n", "reverse_geocoding['lat'] = reverse_geocoding['lat'].astype('float')\n", "reverse_geocoding['lon'] = reverse_geocoding['lon'].astype('float')\n", "reverse_geocoding['continent'] = reverse_geocoding.cc.map(countrycode_to_continent)\n", "reverse_geocoding" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "opendoar_df = opendoar_df.join(reverse_geocoding)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Manual fix of null lat/lon" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "opendoar_df.loc[(opendoar_df.latitude == 0.0) & (opendoar_df.longitude == 0.0), ['latitude', 'longitude', 'cc', 'continent']] = [np.nan, np.nan, np.nan, np.nan]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Country intersection**" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPgAAADsCAYAAABZlmuGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2dd3Rb15Wvv41GsFMSZYqkuptsyZab3J2J7RQnLmkvTpuUmZeVl/dHysRJJu09GJN4JZOJJ1mZrMzK+KU4kzh2qkviOLEdF0mxIxdJli3LstW7JZGUSIIk2n5/HMCCKDYAxL3AvedbC0sicA+wQeKHs88+e+8jqorFYvEmAbcNsFgslcMK3GLxMFbgFouHsQK3WDyMFbjF4mGswC0WD2MFbrF4GCtwi8XDWIFbLB7GCtxi8TBW4BaLh7ECt1g8jBW4xeJhrMAtFg9jBW6xeBgrcIvFw1iBWywexgrcYvEwVuAWi4exArdYPIwVuMXiYazALRYPE3LbAIu7SFwECGM+CyEgCGRytzSQ0Zim3bPQUg5i+6J7E4lLGGgFmoDG3L+F/49gBD1VLy4LjACDQCL3b+GtT2M6MI1vwTINWIF7AIlLE9AOzAZm5m7NLpiSBA4DPQX/9lgPwD2swGuQnKDnAt1AJ9DgrkUTkgVeBfbmbgc0phl3TfIPVuA1gMQlghFzN0bYLe5aVBZp4ACwB9ipMe1x2R5PYwVepUhcQsB84BRgHib45UV6ga3AFo1pn9vGeA0r8CpC4hLAzNCnAAsw0W0/0YMR+ysa06NuG+MFrMCrAIlLI3AmcAYQddmcamEPsBHYoTHNum1MrWIF7iISlw5gGbAIm3Q0HoMYob+oMR1225hawwrcYXKJJScDZ2G2tSxTIwO8DDxr99unjhW4g0hcFgMXAG1u21LDZIFNwFqN6aDbxlQ7VuAOIHGZD6wAZrlti4fIAC8C6zSmCbeNqVaswCuIxKUTuBDocNsWD5MGXsC47im3jak2rMArgMSlAbgEs9a2OMMg8ITGdKvbhlQTVuDTSC6AdibGHY+4bI5f2Q2s1pgecduQasAKfJqQuLQDV2Aj49VAFliPCcT5utDFCrxMctlnF2K2vcRlcyzH0wf8RWN6yG1D3MIKvAwkLq3A1ZhSTUt1kgWe0piud9sQN7ACLxGJy+nAZdiuOLXCXuARv+2dW4EXSa508wpshLwWGQEe15huc9sQp7ACLwKJy0zgzbjTLcUyfazTmK5x2wgnsAKfIrlstKuZxhLOoKIhRQWTlpUK2IITB9mBCcB5OjnGCnwKSFzOBi5ikih5e4p0d5L03CTZuUm0K4l0pAjOSBMKgYgiAfMkMpaSR4TMQIBsf5DMkRB6IEx2bxjZG0F2RwjujhBWsZH6aaQHeMDLxStW4BOQ2wK7HFgy+rH2FOkVAyQvGITFI4TajIgrOgOPCJkddSSfrye7rpHgxnoiI3bWL5dh4M8a0/1uG1IJrMDHIRdMexPQBdCQIXPBIMkLB9ClQ4Tb0+53W8mA7o2QerGe9PoG5NlGIgNBz7Z2qiRZ4GEvBt+swMdA4hIF3tqZpPWtfSRXDBDqTBEJVHkiSwb0+QaGH2hFnmimLmPd+WJQzDbaK24bMp1YgY/iA5+QeoU3XNrPjIXJ2m2flBAyTzYzcn8bwZfqqXPbnhpBgZUa001uGzJdWIHnEZl5tJUlT1zNyak66t02Zzo5FCL5SAvp388g0hOyiTlT4K8a0+fdNmI6sAIX6QDOH65nzqo3kR1u8G4VWBqyj7Qw9NPZ1PVZoU/GGo3pOreNKBf/ClykCzgP6BquJ+11cReSguzDrQz9dzvRoyEblJuAVRrTjW4bUQ7+E7hIO6YZQydAKkzm8WvIDDX5Q9yFJIXsA60M/bydaMJG38dCgYdqObruH4GLRDFlnaeTi4Yr6Oo3MtzX7q01d7GMCJk/tDF8Rzv1dl/9BDLA/RrTfW4bUgreF7hIANNl5QJGdVlZdzGJ3Yuq+uA+R+kNkvp2J5m1jbW7e1AhksC9tXiOmrcFLtIGXMkYXVa2nk5i43lW3GOxuonEd+dQZ93240gAd9daWqt3BS6yDOOSnxAtfrWT4TWvo46ATQQZj/4A6Vs7ST/TZGfzAg4B99TS8cfeW2+JNCFyLXApY4h7oJnkM5cTtuKemOYsoZv3EP3EfgaDikdngaJpxzT5qBm8NYOLdGNKOsecdZIRMo+9hcyIT7bDpov9YUZu7kb21NnfW47HNKYvuW3EVPDODC5yNvBWJjid86nXkbTiLp45Ker+fQfBsxLYw/8Ml0lcauKUmtoXuEgQkSuBi5mgGGTr6SR6Z/t7O6wcGpTgv+wicuUR7DFBZun3xlzFYVVT2wI3e9vXA6dOdNlgI6lNy22wqFxCEPjMfhref8iKHGgBXue2EZNRuwIXacCI+6SJLlPQZ64gkw3W8HutMt53mIbP7iUhNvi2WOJyittGTERtfuhFWoC3ATMmu3TzMhJHZ9jZe7r5u34avr6L4bosWbdtcZnLcmfRVSW1J3CRmcANTKGzaX8LyVeW2nV3pVg6RP23d5DyucjrMG20q5LaEriZua+FyTPQsoI+fQWqNre6osxLUve1XYz4fK98gcSlKvvk186H36y5r4WpzciblpMYbLGdTJxgyTD1X9jLkNt2uMylEpeq+7zVhsBF6jB73FM6cCDRQGrb6dY1d5KLB2j4+AFfR9frMQVNVUX1C1wkCFwDzJzqkBfOJ2Vdc+e5to+Gdx32tcjPkLi0uW1EIbUggiuAjqlefKSN5IFuO3u7xYcOUe/jZJgAJuGqaqhugYssBU4rZsiGFWSw7YJdIwDyyf1EfZzWOl/i0u22EXmqV+AiczCtlabMwTm2O0s1EILAF/cQaspQM2WV08zFEpeqmGSqU+AmYv4GirRvwwV25q4WmrOEPruPpNt2uMQsivQ8K0V1Ctx0YSkqO2jnYoYSzXZbrJo4f5D6N/b5dj1+bjXM4tUncNOJpag1TFbQTcttn+9q5H+9St3sFJ4+onccWoDFbhtRXQIXacW0WSqKnSczlIy6fxig5UTqlOCX9/h2Lb7cbQOqR+AiAvwdY7RZmoytZ9jmgNXMySNE33eIQbftcIF2ictcNw2oHoGbM7jnFDvo4ByGE0127V3t3HiY+kXDvgy6nevmi1eHwEUiwIpShr681NdFDjVDCAKf2u/LqrNOicuEPQsqSXUIHM5ngl5q4zHYSKpntq31rhVOHiF6Sb8vi1LOdOuF3Re4OZxgaSlDty0habPWaouPvUrQh6WliyUurgSB3Re4yd0t2o6soLsX2rV3rdGeJnJDr+9m8RDgSr24uwIXOQmYX8rQffMZTkfs3nct8u7DRML+6wJzuhsv6vYMXnKEcfuEfVQt1UxzltANvb4rRulwo5TUPYGb3moLShmaDpLtm2Xd81rmf/TYWdwJ3JzBzyl14IG5jNiGDrVNU5bQdX2+m8UdX4e7IxKRZsp4s3vn+y4K60mu6fNdBmKT00ceuTULnsEExwxNhIIe6rDuuRfoSlF38jAjbtvhMCUtS0vFeYGbnPOSa2UPdTCSCfvum9+zXN/ru0KUknaNSsWNGXw+RdZ6F7Jnoe8+EJ7msn7qfBZsO8nJk1DcEPiScgYf7LTH/3qJqBK8ot93brpjs7izAhepB+aVOvxIG8mRelv37TWu7XPbAsfxqMBNgKHk13y1i/Q02mKpEk4ZJuqzri9TbgNeLm4IvGR626fLDEs1EQC5ps9XteL1EpcWJ17IOYGLhCiy19pojs6wuede5dxB3+2MOFIj7uQM3k0J7ZjypENkh+3627MsHCHiszLSorsXlYKTAi/PPZ9la7+9TBgCZw75KpruuRm8rG+sntl2/9vrnDfoq7/xLIlLxZeczgjcHP9bVqlc3yxbXOJ1zk74ah0uFHFibqk4JZqytwWOtvnqj+9LFg37bh3eWukXcErgZbnnqTCZkQabweZ1whBY4q91uGcEXlZAYaDFJrj4BZ+twz0j8BnlDE40+qoYwdcsHvHVTokHBG4ONSjrzO7hBl+ty3xNe8pXwVQPCLzM6DnAkBW4b5iV9lW2YrjSpaNOCLzsb6nhel+5bb6mMeu7gxHK8m4noyYEPlLvK7fN1wRATkr5Kqha0fZjTginbBdkxM7gvmJOyleR9IqereeEwMt+A8mITXLxE51JX7no/hZ4VlB7RJG/6Ez5alu0oi66E8IZ9w28F656CK5QkDfAyrvg4dHXpCJkwfsz+JO76fjeGj6W/3kwRfvFc7l3MEnjll7OEdBoiKOfvZSfLGnniJu2VprZKYeWZIep5xd8iAG6EOBKbmczS9nB5YQZAGAFv+NKnq+gFRWdwZ0Q+Jhv4E7oegiueB6+3gLppfCpP8Bz18LBwus04A937eK5HLh4Ll8FSGaQ9/+Gb77lFNZ2NJLoaOJegG+u5qrbnuW6W9/Ez921trKEnfqL/4r3MI8XeBs/YJggg0TYzFJO4SHew4MOWVHRFGwnXPQxmzSsgc6FsG0OJBsguxQ2/3CMwwiz4g+BF/K7FzmjKcLBszvo6Wg6drxPMkMEvP/7CDnxDnuop5fTuJ5VAETJMMuVY40r6q04MYOP+SVyGez5Mbz9BWicBam1sGwh7Bh9nQb8F0F/Yjcrlp3EmvzP/+cvvH3TYS4OBxi65SpuddM2JwhW+EMPwC5mEaaf/+Ij9DOXVnbwPu4CYAtX8k0uoZUdvItf0U6igpZUdJJ1YgYf84/1Ltj/TnjgKvj0CvjkPNgV4MTgioqvAi4MJgnuPMLZb1/CM/n7vnoVd//q3XzhzNn87SfrudJN+5zAkRk8Q5AB5nM+j/E5vkaQJHdzDVfxKJ/ny9zEV6nnCL/h3RW2pKJfZk4IfNw/1w9h9QG4ZRd8qwkSC+DA6GtE/TWD/3ojy2bWs/OUmfSPfuxtp7Pm5cOc54ZdTuLIN3oHvUToZQXbADiTZ+hlPl30E0IJolzOSvpYWGFLKvp15qrA10IzwKMw81k4N8Yxt7RgtK8EvmYvFy6fw1Ov/bznWKntA1tY3hZlvzuWOUfaibhLN0eJ0sumXDOSLZxBC/vYXZB5+TTn0MzeCltS0ffqxBo8wzjbXDfAxxPQGIDM5+AXp3JikCPgIwf9cILIvn7O+MoV/Cx/34/X8c7vPEmHCNoU4fCnL/J2BB0g49RX+tX8gvv4n9xLiHoO8j5u51e8l37mAlDPYd5x7G9RISoqcFGt8JelyPvIzdSlMBIl/eA7bKKLn3iyicQt3eWnONcIGzWmqyr15E646GW14Alk/OWiW2DQX6VFFW1R5cSvcnjyS8YnnCIoGX9F0v3OvrD39/oL8LfAASJJX1UX+Z59EV+VB5etj4moDYGPWIH7iX1hK/Dpwolf5An7ucVSN2xddD9xIOyrM+hq3kUv+3j3aCUTBS1VRVLIHg3ZGXy6cOIXWXZpY33CV0EXX9MX9FW7JoDBSj65Uy56WS52NGG3yvzCoZCv4i0DGtOKfqFVXuAmk+ZoOU9Rn/CVy+ZrDkR85a1VvHGHU8I5VM7ghgErcL+woaJNhKsOzwi8rAKJxn7CAZvs4gvWNvrqkEnPCPyEMtBiEJCmoySnyxhLddIbJHUo7Ku6A88IvAdIlfMErT2+Cr74ko315X1GapCyt5AnwxmBm0BbWbP4jEN2He51nm301W7JsMa0rODzVHBSNHvKGTzzoPdbJ/udZxt9lcFW1oQ3VZwU+AkNFYuhqZ9IIG3ddK/iw/W3xwSu2keZQYWmo75bo/mGF+t9l8HmMYEbyprF22ygzbM82uK2BY6SZdQBH5XCaYFvL2fwrAM20OZFBgKkn2yq7BE+VUZPpVNU8zgtmAOQO/OpBDr2UidZm/DiNR5vYUTFVxH0SndqfQ1nBW62yzaXOjyUJtDaU9n6WYvz3Nvmq+g5lLlULQY3XN5NlNEqtmunr4oRPM/2CMN76nyVnjpCmanbxeC8wFUHKGNPvGunrz4MnueBNt8tuXZprNK9yo/hVtBqU6kDo0OEmo5YN90LpCD7cKuvgmtQZqC5WNwS+HbK6NXWscd3e6ae5OkmRoYDvtoZyQK7nXxBd365qllgfanDu3ZYN90L3DXLV+IG2KsxdbQq0s1f8EtQ2rnLrX2E64ZsVlsts6GeoS1R6ty2w2FK3kEqFfcErpoBNpQ6vHOXrQ+vZW47yXfFQ0nIHVXsIG67SBspsW3sok3WTa9V1jcwtC3qu7/fKxpTx1Ot3RW4agp4ppShjYOE2w6feNywpbrJgP7Af7M3mCWp47g9gwO8CPSWMnCRK78ySzmsbGZol78SW8DknjtSXDIa9wVuIupPlDK0cyfRUNJumdUKI0LmtpN8F1iDMvI+ysV9gQOo7gZ2FjssoMiCV2zSS63wu5kMHw35zj1P4pJ7DtUicMNfofjZePEmovb88OpnV4SRO2fR4LYdLvCCxtS1Ld3qEbjqUeDpYofVjRCcs7uyB7hZyiMF2Vu6kIy/SkLBTFglbwVPB9UjcMMGSmhlc9rzhFBbZVat3D7bdxVjeV7SmLo6+VSXwE29+CMU6ao3HyXSscdumVUjG6MM3TPTl655FnjObSOqS+CQd9X/Wuywpc8Qtt1eqouEkPl6t++aOeTZojEtuaBquqg+gQOobqLIvN2GBOF5W+0sXk18bw7JvpCvWiHnyVJiAtd0U50CN6ykyFNJz1hHNGh7p1cFq5tIrGzBX2eFHuN5J04tmQrVK3BTjPJnishVD6cILn7RRtTdZksdw7d2+q6RQ55h4Fm3jchTvQKHfHunh2Dqa+tTNlIfGbalpG5xIETyS/MIp/zVyKGQZ5yu+Z6I6v8jqO4FHpvq5cEsgSXrrcDd4EiQ1OcXEEgEfZetlqcPU1tRNVS/wAFUX6aIfPX5W2lo6LcprE4yLGS+OA/t8WdQLc+TGtOq2smpDYEDqG4A1k718vNXIWRt8osTpCB781xSPqwSK2SbxrToeopKUzsCB1B9CtMkYlJa+4icvMlum1WaLOitXYy80ODboBqYXuer3DZiLGpL4ACqq5hihtCS9dTbFsuVIwP63TkMrW727XZYnic0plU5mdSewAFUnwSemuwyAblgJWKrzaafYSETm8vIw62+TEMtZKfG1PFmilOlNgUOoLoWWD3ZZU39RE573u6NTyd9QVKfWUBmfaOv3XIwe96Pu23ERNSuwAFUX8Dsk09YnHLqRhpaeqzIp4M9YUY+sRDxeUAtz+Ma05JafztFbQscQHUrcA+THEt8wUqCAZvGWhYv1DP0qYWEfZpfPprnNKbb3TZiMmpf4ACqh4HfMsG5yw0Jwmc9bQNupfJYM4kvziM64t8MtUL2AWvcNmIqiDp30GHlEQkAFwJnj3fJ8+cxuP10Gp0zqrYZETI/OImRB9t8H0zLkwB+W+2ueR5vCTyPSBfweqBprIefvJLEoTn2AzsZW+sY/lo3wYNh39Z0jyYL/F5j6tj53uXiTYEDiESAS4HTRj+UDpJdeQ2pwRZftvCdlDRkfzuToZ+106D+66M2EU9oTF3tsVYs3hV4HpFFwGVw/Iw9XE/6sbdAqs4GjArZHmH4G10EfNpDbSKe15gW3WnIbbwfMFHdBtwFrINjUfToEKGLHiVjk2AMI0LmR7NJfGIRUSvuE9hKiYdzuI33Z/BCRFqAS4AF+bv2LGBo7aX+TbVMCtkHWhn6eTtRH5d5TsRe4I9uHBw4HfhL4HlEuoHzgE6AzUsZ3Hy2vyLrKcg+2MbQz2cR9eFpI1PlMHBfNTVwKBZ/CjyPSCdG6N0vLWPw5bO8L/I0ZB9pYeins6mzCSsT0g/cUyvbYePhb4HnEekAlm86i9mvLPOmyJNCdnUzwz+ZTcTnTRmmwhHMdtig24aUixV4ISKN99zAmQMtLGvN1P7ebxZ0c5ThP7XCyhbqbBbalOgB/lCt5Z/FYgU+BuGb5czlg5x/XR+6fJBouMZ2Gw6ESD7cSupPbdTZ2booDgH3u33c0HRiBT4OEpfTgddFs+i5g4xcOICelSDUka7OLaQDIVLrG0ne30Z4S7Q6baxyDmCi5TUbUBsLK/AJkLjMA66GY4KZmSZ9/gDJFYNw5hBhN1z5NGR3R0hubCCzroHAhgYiA3aLqxx2AQ+5ecxvpbACnwSJywzgGqB5rMe7R0guGSY9N4l2J5GOFIHZKULN2elxjYeFTE+I9IEwmc1RWNdI8KWor/uOTzfPAX/TmDeFYAU+BSQuUeBNwJypjqnLkp2TIj03SaYriYYVgoqG1Czog4oEMT+LQkagJwQ9IaQnhBwKETwQJmiTTypGFlipMX3JbUMqiRX4FJG4BIHXAae6bYulbIaBP9dSVVipWIEXicTlLOAiaiyybnmNQxhxT9gByCtYgZeAxKUduApoc9sWS1E8B6ypttNHKokVeIlIXEKYMtTT3bbFMikJ4BGN6R63DXEaK/AykbgsxqzN7d5zdbId0/3UM8krxWAFPg1IXJows/mCya61OEYScxjgJrcNcRMr8GlE4rIQ0yZqzF5wFsfYjNnb9kQ+eTlYgU8zubX5BcAybKTdaXqAVX7Y/poqnhe4iGwHPqqqD03T8/0RuFNVb5/wurjMBC6niOQYS8mkgGcwfdN8EyGfCo4IPCeyDjjuZJHTgDpgC/ADVf3fo8YocKqqviIiNwNfxhzTmsYcIXyTqk7aJ2u6BV4suXz2FUC7G6/vcTLAi8Ba646PjZMu5PWq2lRw2wt8COgF3iMik7UwvktVmzBCeQT4VYXtPQ4xFP370pju0pj+FngQ814t5ZPFfMnfqTH9qxX3+Li2RhQRwQj8KxgX6/qpjFPVNPBzoFtEZueeq11Efi8ifSLSIyIrR4nxHBF5TkSOiMhdIhLNjZuRG3dQRHpz/59bYOOjInKLiKzG7KUuzt330dzjHxGRVSLyrdz4bSLyloLxi0TkcRHp52Zu42Zez79yLXC0nN+dj8kCmzDCXuWFjiuVxs0g0OXAXOBO4JfAh6cySMyBBh/CNMTLz4g3AbuB2ZilwJeAwrXHjZiKsEWYY40+krs/APwYs701HxgCvjfqJT8IfAxTTbZjDJMuAl7CeBbfBH6Y+/ICuANzhtUs4GbggwzRj2nj/CfAd4kXJZLEZKHdpTF93C9pptOBk90+7haR/DG/j2Jygv+oqr0icgfwuIicpKqvjjP+RhG5DiO0PuBdudkcjAfQCSxQ1VeAlaPGfje3JEBE7gPOAVBzaOFv8heJyC0Y97+Qn6g5pjh/zWi7dqjqbbnHbge+D3TkvohWAFerahJYJSL3AuRKE3cAO3LlqMswRSy2+8rx9GJc8c1erNV2Aic/UG/PB7pEpB7YD3wUQFWfEJGdwPuB74wz/peq+vci0o4R5fmYLwqAf8PMkH/OCfC/VPUbBWMLt00SQFfOjgbg25jZfUbu8WYRCaq+1gd71yTv67XnVtVE7vXzsYIe1eO6cu4C5hUO1pj2AislLmswgcdT8XdALo3JPttot7vKx60Z4x1AC/B9EfmP3H1tGDd9PIEDoKqHRORjwNMicoeq7lPVfoybfpOILAP+IiJPqerDk9hxEyaX/CJV3S8i5wBr4bjzuErdZtgHzBSRhgKRzxvvYo3pCLAB2CBxaQVOBk7BHwUtaWAn5gSRnRp7zTOzlIlbAv8w8CPM1leebuApETlLdeID3lT1JRH5E/B54J9yrvsmzJbbEcz2yVT2Q5sx6+4+EZkJxIp+J+PbuENEngZuFpGvYDyO64H7Jh0b0yPAs8CzEpdZGKHP55iX4QXSmLjJFmCHFXVlcFzgYk4VuRo4V/U4F2y/iDyAEf9np/BU/4aZqb+OcWu/hwmy9QLfV9XRa+mx+A4mEHYIc0TNrcDbp/pepsAHgJ9gAoJrMMG1ojq0aEwP58b/TeLSgPki7MYEKGvpCOQs8ComsLgXOGCTUiqP5zPZqgkRuQvYpKrT4inkAnSdwEmYdfsMqJrjfhOY1NH8l+d+O0s7jxV4BRGRFZgP+TZMT7e7gUtUdW1FXs/kwc8AZuZurZiAXyNU5Cz0LGaJM4jZ2ch7Gz1+Lc+sNqzAK4iIXI/ZNpuFWW9+XVV/7IotRvxNBbcIZok2+hbExDAymHVy4b9JjJjztyGvdiP1ClbgFouHseWMFouHsQK3WDyMFbjF4mGswC0WD2MFbrF4GCtwi8XDWIFbLB7GCtxi8TBW4JZxybWk2iAiCRHZLyL/KSIVL18Vke0iMiQi/bk2XH8VkY+P7oknIpeKyF9y1x0RkftE5Mxx3oeKyHtG3f96EcmKyEDuOV4SkX+o9PtzEitwy5iIyE3AvwKfw+S0X4xpbfVgrltNpbleVZtzr/kN4J+BHxbYdwnwZ+AeTAOPRcB6YLWILB71XB/G1AR8aIzX2Ztr5tkC/BNwm4h457w5VbU3ezvuhvmwDwA3jrq/CTgI/COmg86vMSWw/Zj69eUF13ZhOu8cxBTbfLLgsZsxffh+mhv7AnBBwePbgTeMeu0LMcUty3I/r8SUBY+2/Y/ATwt+XpAb9y5MTv2cgsdeD+weNf5V4N1u/w2m62ZncMtYXApEgd8W3qmqA8D9wBtzd70N0756Jqau/m4RCedc6fswM2q+/v/TIvLmgqe7AdNwsw24lxObXR6Hqq7BFOxckWu1dSljt87+ZYF9YGbtp1X1N5ge6h8Y6/lFJCAiN2DKbl+ZyJZawgrcMhbtwCHVMeu393GsZ9wzqvprVU0B/475UrgY02xytqr+i6omVXUrcBvw3oLnWaWq96vpffffwPIp2LWXY6WwgZwtE9kHRuB35P5/Bye66V0i0ocpe/0d8BmtUDmvG1iBW8biENAuImN1/OnMPQ4FDSlVNYuZYbswbnFXLkDWlxPQlzAtrfOMboQZHef1CunGrKV7MW5350T2ichlmLX5nbnH7gDOyvXey7NXVdswy5LvAldNYkNNYQVuGYsnMMdEvbPwThFpAt4C5JtZzit4LIBpI7UXI/xtqtpWcGtW1beWalCueUY3ZuYfzNn47jEuvbHAvg9jOtysE5H9wN8K7j8OVR3BBPLOEpHpbNvlKlbglhNQ1SNAHPgPEbkmt65eiFnf7sa41ADni1ua+hwAAAEMSURBVMg7czPvpzFfCk9i+s/1i8g/i0i9iARFZFlOpEUhIi25ppp3Aj/TYw05vwB8WEQ+KSLNuVNqvgZcAsTFnF5zI+bQinMKbp8A3j+Wt6Cmf/2twP8t1s5qxQrcMiaq+k2MW/0tzFFLf8PMzFfnZjswW1TvwbjMHwTeqaqp3Lr6OoygtmFc5v+H2W6bKveJSH/uNb+MWeO/tketqquAN2O8jH2YgyTOBS5X1ZcxzTOHMBH1/fkbpptvCNMLfyx+BMzPdeOpeWxHF0tJiDnx9RRV/Xu3bbGMj53BLRYPYwVusXgY66JbLB7GzuAWi4exArdYPIwVuMXiYazALRYPYwVusXgYK3CLxcP8f7j87dD0gbDtAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "venn2([set(fairsharing_countries.countrycode.dropna()), set(opendoar_df.cc.dropna())], set_labels = ('FAIRsharing', 'OpenDOAR'))\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Country coverage**" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "name": "FAIRsharing", "type": "bar", "x": [ "US", "GB", "DE", "FR", "CH", "CN", "NL", "IT", "CA", "BE", "ES", "JP", "SE", "CZ", "NO", "DK", "EU", "AT", "FI", "IE", "AU", "IL", "PT", "HU", "GR", "MT", "LU", "LT", "IS", "SK", "ME", "HR", "IN", "PL", "SG", "KR", "RU", "ZA", "TW", "MX", "BR", "NZ", "SA", "HK", "AR", "TR", "BG", "RO", "PK", "CR", "MA", "CY", "TH", "EE", "UG", "SV", "UY", "PA", "TG", "AE", "NI", "NG", "BJ", "CL", "CM", "CO", "EG", "ET", "FO", "GL", "HN", "ID", "AQ", "KE", "LV", "MG", "ML", "MR", "MW", "MZ", "NE", "ZW" ], "y": [ 686, 248, 192, 162, 114, 99, 96, 91, 86, 83, 83, 80, 76, 71, 69, 67, 66, 64, 63, 62, 62, 61, 60, 59, 58, 53, 52, 52, 52, 52, 51, 51, 32, 11, 10, 10, 9, 9, 8, 8, 8, 8, 6, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "name": "OpenDOAR", "type": "bar", "x": [ "US", "JP", "GB", "DE", "ES", "TR", "PE", "ID", "BR", "IT", "HR", "FR", "PL", "CA", "IN", "AU", "UA", "CO", "NO", "NL", "AR", "TW", "CH", "PT", "SE", "CN", "MX", "AT", "GR", "ZA", "BE", "HU", "RU", "EC", "KR", "KE", "RS", "BY", "CZ", "IE", "NG", "MY", "FI", "NZ", "IR", "CL", "VE", "TH", "LT", "DK", "SD", "LK", "DZ", "SI", "BD", "EG", "NI", "TZ", "KZ", "HK", "CU", "CR", "UG", "PH", "MD", "BG", "ZW", "SV", "SA", "EE", "CY", "JM", "UY", "RO", "BW", "PA", "PS", "MK", "PK", "AE", "GH", "LY", "ET", "DO", "KG", "LV", "LB", "IS", "AZ", "SJ", "SN", "HN", "SO", "IL", "IQ", "YE", "SG", "GE", "BO", "NA", "BA", "XK", "TN", "CV", "ZM", "BN", "AM", "SK", "FJ", "VA", "LS", "MV", "PR", "MZ", "MN", "MA", "LU", "PY", "PW", "GS", "MW", "AL", "MR", "MO", "WF", "VN", "RW", "NC", "GT", "SY", "BJ", "NP", "GP", "CM", "TT", "AF", "LA", "MT" ], "y": [ 886, 544, 328, 283, 160, 143, 138, 136, 135, 135, 135, 133, 115, 104, 101, 95, 89, 89, 83, 74, 69, 63, 61, 55, 55, 54, 46, 42, 40, 40, 39, 39, 35, 34, 33, 33, 32, 31, 27, 27, 27, 26, 26, 21, 20, 19, 18, 18, 17, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 10, 9, 8, 8, 8, 8, 8, 8, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Country coverage" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data1 = fairsharing_countries.groupby('countrycode')[['url']].count().sort_values('url', ascending=False)\n", "data2 = opendoar_df.groupby('cc')[['id']].count().sort_values('id', ascending=False)\n", "\n", "\n", "plot = [\n", " go.Bar(\n", " x=data1.index,\n", " y=data1['url'],\n", " name='FAIRsharing'\n", " ),\n", " go.Bar(\n", " x=data2.index,\n", " y=data2['id'],\n", " name='OpenDOAR'\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Country coverage',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "\n", "go.Figure(plot, layout).show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Continental coverage**" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "fill": "toself", "name": "FAIRsharing", "r": [ 27, 320, 2176, 787, 70, 14 ], "theta": [ "AF", "AS", "EU", "NA", "OC", "SA" ], "type": "scatterpolar" }, { "fill": "toself", "name": "OpenDOAR", "r": [ 214, 1264, 2145, 1100, 121, 513 ], "theta": [ "AF", "AS", "EU", "NA", "OC", "SA" ], "type": "scatterpolar" } ], "layout": { "polar": { "radialaxis": { "visible": true } }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data1 = fairsharing_countries.groupby('continent')[['url']].count()\n", "data2 = opendoar_df.groupby('continent')[['url']].count()\n", "\n", "plot = [\n", " go.Scatterpolar(\n", " r=data1.url,\n", " theta=data1.index,\n", " fill='toself',\n", " name='FAIRsharing'),\n", " go.Scatterpolar(\n", " r=data2.url,\n", " theta=data2.index,\n", " fill='toself',\n", " name='OpenDOAR')\n", "]\n", "\n", "layout = go.Layout(polar=dict(\n", " radialaxis=dict(\n", " visible=True\n", " ),\n", " )\n", ")\n", "\n", "go.Figure(plot, layout).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }