fake-orcid-analysis/notebooks/01-Exploration.ipynb

12806 lines
455 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploratory analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TODO:\n",
"- Understanding the reason for fake profiles can bring insight on how to catch them (could be trivial with prior knowledge, e.g., SEO hacking => URLs)\n",
"- Make casistics (e.g. author publishing with empty orcid, author publishing but not on OpenAIRE, etc.)\n",
"- Temporal dimension of any use?\n",
"- Can we access private info thanks to the OpenAIRE-ORCID agreement?\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import ast\n",
"import tldextract\n",
"import numpy\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"init_notebook_mode(connected=True)\n",
"TOP_N = 30\n",
"TOP_RANGE = [-.5, TOP_N - 1 + .5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable solid ORCID iDs for explorative purposes:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"AM = '0000-0002-5193-7851'\n",
"PP = '0000-0002-8588-4196'\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Anomalies ORCiD profile"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"JOURNAL = '0000-0003-1815-5732'\n",
"NOINFO= '0000-0001-5009-2052'\n",
"# todo: find group-shared ORCiD, if possible"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable fake ORCID iDs for explorative purposes:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"SCAFFOLD = '0000-0001-5004-7761'\n",
"WHATSAPP = '0000-0001-6997-9470'\n",
"PENIS = '0000-0002-3399-7287'\n",
"BITCOIN = '0000-0002-7518-6845'\n",
"FITNESS_CHINA = '0000-0002-1234-835X' # URL record + employment\n",
"CANNABIS = '0000-0002-9025-8632' # URL > 70 + works (REMOVED)\n",
"PLUMBER = '0000-0002-1700-8311' # URL > 10 + works "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the dataset"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('../data/raw/initial_info_whole.tsv', sep='\\t', header=0,\n",
" names = ['orcid', 'claimed','verified_email', 'verified_primary_email', \n",
" 'given_names', 'family_name', 'biography', 'other_names', 'urls', \n",
" 'primary_email', 'other_emails', 'keywords', 'external_ids', 'education', \n",
" 'employment', 'n_works', 'works_source'])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7552</th>\n",
" <td>0000-0001-7831-7567</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Vahab</td>\n",
" <td>Vahdat</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"Scopus Author ID\", \"57193490305\"], [\"Scopus...</td>\n",
" <td>[[\"Industrial Engineering\", \"PhD\", \"Northeaste...</td>\n",
" <td>[[\"Post-doctorate fellow\", \"Harvard Medical Sc...</td>\n",
" <td>25</td>\n",
" <td>[\"Vahab Vahdat\", \"Scopus - Elsevier\", \"Multidi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8416</th>\n",
" <td>0000-0001-8161-1345</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>AYFER</td>\n",
" <td>TEKIN ATACAN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16498</th>\n",
" <td>0000-0002-1133-1505</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Xianrong</td>\n",
" <td>Lai</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"Scopus Author ID\", \"15769435500\"]]</td>\n",
" <td>[[\"Department of pharmacy\", \"Bachelor of Tradi...</td>\n",
" <td>[[\"Associate Research, Professor\", \"Chengdu Un...</td>\n",
" <td>115</td>\n",
" <td>[\"Xianrong Lai\", \"Scopus - Elsevier\", \"Crossref\"]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16830</th>\n",
" <td>0000-0002-1257-5536</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Alexandra</td>\n",
" <td>Zimmer</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"Research assistent\", \"Fraunhofer-Institut f...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18835</th>\n",
" <td>0000-0002-2026-4156</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Fatma</td>\n",
" <td>Sri Wahyuni</td>\n",
" <td>NaN</td>\n",
" <td>[\"Ayu\"]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"ResearcherID\", \"C-5194-2015\"], [\"Scopus Aut...</td>\n",
" <td>[[\"Biosains\", \"PHD\", \"Universiti Putra Malaysi...</td>\n",
" <td>[[\"Lecturer\", \"Universitas Andalas\", \"Padang\",...</td>\n",
" <td>27</td>\n",
" <td>[\"Publons\", \"Crossref Metadata Search\", \"Scopu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10733293</th>\n",
" <td>0000-0002-9887-7788</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Markéta</td>\n",
" <td>Laštůvková</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"\", \"VSB - Technical University of Ostrava\",...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10737258</th>\n",
" <td>0000-0003-1367-8104</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>LORENA</td>\n",
" <td>GUTIÉRREZ GARCÍA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"LinkedIn\", \"https://www.linkedin.com/in/lor...</td>\n",
" <td>lorenagg@unex.es</td>\n",
" <td>NaN</td>\n",
" <td>[\"Agroecolog\\u00eda, Bot\\u00e1nica, Did\\u00e1c...</td>\n",
" <td>[[\"ResearcherID\", \"AAE-6316-2021\"]]</td>\n",
" <td>[[\"\", \"M\\u00e1ster en Formaci\\u00f3n del profe...</td>\n",
" <td>[[\"PCI\", \"Universidad de Extremadura - Campus ...</td>\n",
" <td>14</td>\n",
" <td>[\"Multidisciplinary Digital Publishing Institu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10738308</th>\n",
" <td>0000-0003-1741-3437</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Xing</td>\n",
" <td>Liu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"ResearcherID\", \"S-3053-2017\"]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10741460</th>\n",
" <td>0000-0003-2909-8585</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Yusuf</td>\n",
" <td>Özcan</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"\\u0130lahiyat Fak\\u00fcltesi\", \"Doktora\", \"...</td>\n",
" <td>[[\"Research Assistant\", \"\\u00c7ukurova Univers...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10745078</th>\n",
" <td>0000-0003-4259-5324</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>P Rama Mohan</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[\"Scopus Author ID\", \"24776757000\"]]</td>\n",
" <td>[[\"EEE Department\", \"Ph.D. (Power Electronics ...</td>\n",
" <td>[[\"Associate Professor\", \"RGM College of Engin...</td>\n",
" <td>21</td>\n",
" <td>[\"Scopus - Elsevier\", \"P Rama Mohan\"]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2418 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email \\\n",
"7552 0000-0001-7831-7567 1 1 \n",
"8416 0000-0001-8161-1345 1 1 \n",
"16498 0000-0002-1133-1505 1 1 \n",
"16830 0000-0002-1257-5536 1 1 \n",
"18835 0000-0002-2026-4156 1 1 \n",
"... ... ... ... \n",
"10733293 0000-0002-9887-7788 1 1 \n",
"10737258 0000-0003-1367-8104 1 1 \n",
"10738308 0000-0003-1741-3437 1 1 \n",
"10741460 0000-0003-2909-8585 1 1 \n",
"10745078 0000-0003-4259-5324 1 1 \n",
"\n",
" verified_primary_email given_names family_name biography \\\n",
"7552 1 Vahab Vahdat NaN \n",
"8416 1 AYFER TEKIN ATACAN NaN \n",
"16498 1 Xianrong Lai NaN \n",
"16830 1 Alexandra Zimmer NaN \n",
"18835 1 Fatma Sri Wahyuni NaN \n",
"... ... ... ... ... \n",
"10733293 1 Markéta Laštůvková NaN \n",
"10737258 1 LORENA GUTIÉRREZ GARCÍA NaN \n",
"10738308 1 Xing Liu NaN \n",
"10741460 1 Yusuf Özcan NaN \n",
"10745078 1 P Rama Mohan NaN NaN \n",
"\n",
" other_names urls \\\n",
"7552 NaN NaN \n",
"8416 NaN NaN \n",
"16498 NaN NaN \n",
"16830 NaN NaN \n",
"18835 [\"Ayu\"] NaN \n",
"... ... ... \n",
"10733293 NaN NaN \n",
"10737258 NaN [[\"LinkedIn\", \"https://www.linkedin.com/in/lor... \n",
"10738308 NaN NaN \n",
"10741460 NaN NaN \n",
"10745078 NaN NaN \n",
"\n",
" primary_email other_emails \\\n",
"7552 NaN NaN \n",
"8416 NaN NaN \n",
"16498 NaN NaN \n",
"16830 NaN NaN \n",
"18835 NaN NaN \n",
"... ... ... \n",
"10733293 NaN NaN \n",
"10737258 lorenagg@unex.es NaN \n",
"10738308 NaN NaN \n",
"10741460 NaN NaN \n",
"10745078 NaN NaN \n",
"\n",
" keywords \\\n",
"7552 NaN \n",
"8416 NaN \n",
"16498 NaN \n",
"16830 NaN \n",
"18835 NaN \n",
"... ... \n",
"10733293 NaN \n",
"10737258 [\"Agroecolog\\u00eda, Bot\\u00e1nica, Did\\u00e1c... \n",
"10738308 NaN \n",
"10741460 NaN \n",
"10745078 NaN \n",
"\n",
" external_ids \\\n",
"7552 [[\"Scopus Author ID\", \"57193490305\"], [\"Scopus... \n",
"8416 NaN \n",
"16498 [[\"Scopus Author ID\", \"15769435500\"]] \n",
"16830 NaN \n",
"18835 [[\"ResearcherID\", \"C-5194-2015\"], [\"Scopus Aut... \n",
"... ... \n",
"10733293 NaN \n",
"10737258 [[\"ResearcherID\", \"AAE-6316-2021\"]] \n",
"10738308 [[\"ResearcherID\", \"S-3053-2017\"]] \n",
"10741460 NaN \n",
"10745078 [[\"Scopus Author ID\", \"24776757000\"]] \n",
"\n",
" education \\\n",
"7552 [[\"Industrial Engineering\", \"PhD\", \"Northeaste... \n",
"8416 NaN \n",
"16498 [[\"Department of pharmacy\", \"Bachelor of Tradi... \n",
"16830 NaN \n",
"18835 [[\"Biosains\", \"PHD\", \"Universiti Putra Malaysi... \n",
"... ... \n",
"10733293 NaN \n",
"10737258 [[\"\", \"M\\u00e1ster en Formaci\\u00f3n del profe... \n",
"10738308 NaN \n",
"10741460 [[\"\\u0130lahiyat Fak\\u00fcltesi\", \"Doktora\", \"... \n",
"10745078 [[\"EEE Department\", \"Ph.D. (Power Electronics ... \n",
"\n",
" employment n_works \\\n",
"7552 [[\"Post-doctorate fellow\", \"Harvard Medical Sc... 25 \n",
"8416 NaN 0 \n",
"16498 [[\"Associate Research, Professor\", \"Chengdu Un... 115 \n",
"16830 [[\"Research assistent\", \"Fraunhofer-Institut f... 0 \n",
"18835 [[\"Lecturer\", \"Universitas Andalas\", \"Padang\",... 27 \n",
"... ... ... \n",
"10733293 [[\"\", \"VSB - Technical University of Ostrava\",... 0 \n",
"10737258 [[\"PCI\", \"Universidad de Extremadura - Campus ... 14 \n",
"10738308 NaN 0 \n",
"10741460 [[\"Research Assistant\", \"\\u00c7ukurova Univers... 0 \n",
"10745078 [[\"Associate Professor\", \"RGM College of Engin... 21 \n",
"\n",
" works_source \n",
"7552 [\"Vahab Vahdat\", \"Scopus - Elsevier\", \"Multidi... \n",
"8416 NaN \n",
"16498 [\"Xianrong Lai\", \"Scopus - Elsevier\", \"Crossref\"] \n",
"16830 NaN \n",
"18835 [\"Publons\", \"Crossref Metadata Search\", \"Scopu... \n",
"... ... \n",
"10733293 NaN \n",
"10737258 [\"Multidisciplinary Digital Publishing Institu... \n",
"10738308 NaN \n",
"10741460 NaN \n",
"10745078 [\"Scopus - Elsevier\", \"P Rama Mohan\"] \n",
"\n",
"[2418 rows x 17 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.duplicated()]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df.drop_duplicates(inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Basic column manipulation (interpret columns as lists when necessary)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"df['other_names'] = df[df.other_names.notna()]['other_names'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"df['keywords'] = df[df.keywords.notna()]['keywords'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"df['urls'] = df[df.urls.notna()]['urls'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"df['other_emails'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
"df['education'] = df[df.education.notna()]['education'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"df['employment'] = df[df.employment.notna()]['employment'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"df['external_ids'] = df[df.external_ids.notna()]['external_ids'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"df['works_source'] = df[df.works_source.notna()]['works_source'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0001-5000-2053</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Jorge</td>\n",
" <td>Jaramillo Sanchez</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0001-5000-6548</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Wiseman</td>\n",
" <td>Bekelesi</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-5000-7962</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>ALICE</td>\n",
" <td>INDIMULI</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-5000-8586</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>shim</td>\n",
" <td>ji yun</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-5001-0256</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Sandro</td>\n",
" <td>Caramaschi</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"0 0000-0001-5000-2053 1 0 0 \n",
"1 0000-0001-5000-6548 1 0 0 \n",
"2 0000-0001-5000-7962 1 1 1 \n",
"3 0000-0001-5000-8586 1 0 0 \n",
"4 0000-0001-5001-0256 1 0 0 \n",
"\n",
" given_names family_name biography other_names urls primary_email \\\n",
"0 Jorge Jaramillo Sanchez NaN NaN NaN NaN \n",
"1 Wiseman Bekelesi NaN NaN NaN NaN \n",
"2 ALICE INDIMULI NaN NaN NaN NaN \n",
"3 shim ji yun NaN NaN NaN NaN \n",
"4 Sandro Caramaschi NaN NaN NaN NaN \n",
"\n",
" other_emails keywords external_ids education employment n_works \\\n",
"0 NaN NaN NaN NaN NaN 0 \n",
"1 NaN NaN NaN NaN NaN 0 \n",
"2 NaN NaN NaN NaN NaN 0 \n",
"3 NaN NaN NaN NaN NaN 0 \n",
"4 NaN NaN NaN NaN NaN 0 \n",
"\n",
" works_source \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8840413</th>\n",
" <td>0000-0002-5193-7851</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Andrea</td>\n",
" <td>Mannocci</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Personal website, https://andremann.github.i...</td>\n",
" <td>andrea.mannocci@isti.cnr.it</td>\n",
" <td>NaN</td>\n",
" <td>[Data science , science of science, scholarly ...</td>\n",
" <td>[[Scopus Author ID, 55233589900]]</td>\n",
" <td>[[Information engineering, Ph.D., Università d...</td>\n",
" <td>[[Research Associate, Istituto di Scienza e Te...</td>\n",
" <td>37</td>\n",
" <td>[Scopus - Elsevier, Crossref Metadata Search, ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"8840413 0000-0002-5193-7851 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"8840413 Andrea Mannocci NaN NaN \n",
"\n",
" urls \\\n",
"8840413 [[Personal website, https://andremann.github.i... \n",
"\n",
" primary_email other_emails \\\n",
"8840413 andrea.mannocci@isti.cnr.it NaN \n",
"\n",
" keywords \\\n",
"8840413 [Data science , science of science, scholarly ... \n",
"\n",
" external_ids \\\n",
"8840413 [[Scopus Author ID, 55233589900]] \n",
"\n",
" education \\\n",
"8840413 [[Information engineering, Ph.D., Università d... \n",
"\n",
" employment n_works \\\n",
"8840413 [[Research Associate, Istituto di Scienza e Te... 37 \n",
"\n",
" works_source \n",
"8840413 [Scopus - Elsevier, Crossref Metadata Search, ... "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == AM]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9517099</th>\n",
" <td>0000-0001-6997-9470</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>other</td>\n",
" <td>whatsapp</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Otherwhatsapp, https://otherwhatsapp.com/], ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[Whatsapp GB, whatsapp gb 2020, whatsapp gb ba...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"9517099 0000-0001-6997-9470 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"9517099 other whatsapp NaN NaN \n",
"\n",
" urls primary_email \\\n",
"9517099 [[Otherwhatsapp, https://otherwhatsapp.com/], ... NaN \n",
"\n",
" other_emails keywords \\\n",
"9517099 NaN [Whatsapp GB, whatsapp gb 2020, whatsapp gb ba... \n",
"\n",
" external_ids education employment n_works works_source \n",
"9517099 NaN NaN NaN 0 NaN "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == WHATSAPP]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"orcid 10744622\n",
"claimed 10744622\n",
"verified_email 10744622\n",
"verified_primary_email 10744622\n",
"given_names 10716789\n",
"family_name 10437094\n",
"biography 333885\n",
"other_names 544550\n",
"urls 688262\n",
"primary_email 121476\n",
"other_emails 47470\n",
"keywords 638634\n",
"external_ids 1285292\n",
"education 2402440\n",
"employment 2626670\n",
"n_works 10744622\n",
"works_source 2671906\n",
"dtype: int64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.count()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4595263</th>\n",
" <td>0000-0002-5154-6404</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Olusola</td>\n",
" <td>Bamisile</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Energy Systems Engineering , Doctoral, Cypru...</td>\n",
" <td>[[, University of Electronic Science and Techn...</td>\n",
" <td>3</td>\n",
" <td>[Multidisciplinary Digital Publishing Institut...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4595264</th>\n",
" <td>0000-0002-5154-6404</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Olusola</td>\n",
" <td>Bamisile</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Energy Systems Engineering , Doctoral, Cypru...</td>\n",
" <td>[[, University of Electronic Science and Techn...</td>\n",
" <td>2</td>\n",
" <td>[Crossref]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"4595263 0000-0002-5154-6404 1 1 1 \n",
"4595264 0000-0002-5154-6404 1 1 1 \n",
"\n",
" given_names family_name biography other_names urls primary_email \\\n",
"4595263 Olusola Bamisile NaN NaN NaN NaN \n",
"4595264 Olusola Bamisile NaN NaN NaN NaN \n",
"\n",
" other_emails keywords external_ids \\\n",
"4595263 NaN NaN NaN \n",
"4595264 NaN NaN NaN \n",
"\n",
" education \\\n",
"4595263 [[Energy Systems Engineering , Doctoral, Cypru... \n",
"4595264 [[Energy Systems Engineering , Doctoral, Cypru... \n",
"\n",
" employment n_works \\\n",
"4595263 [[, University of Electronic Science and Techn... 3 \n",
"4595264 [[, University of Electronic Science and Techn... 2 \n",
"\n",
" works_source \n",
"4595263 [Multidisciplinary Digital Publishing Institut... \n",
"4595264 [Crossref] "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == '0000-0002-5154-6404']"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"df.drop(index=4595264, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 10744621\n",
"unique 10744621\n",
"top 0000-0002-3936-2047\n",
"freq 1\n",
"Name: orcid, dtype: object"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['orcid'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Primary email"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 121476\n",
"unique 121473\n",
"top opercin@erbakan.edu.tr\n",
"freq 2\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dupe emails"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7483666 maykin@owasp.org\n",
"9068234 opercin@erbakan.edu.tr\n",
"10246485 patrick.davey@monash.edu\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email'].dropna().loc[df['primary_email'].duplicated()]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3776350</th>\n",
" <td>0000-0002-0836-2271</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Maykin</td>\n",
" <td>Warasart</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>[maykin@dga.or.th]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7483666</th>\n",
" <td>0000-0001-9855-1676</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Maykin</td>\n",
" <td>Warasart</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>[maykin@dga.or.th, maykin@ieee.org]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"3776350 0000-0002-0836-2271 1 1 1 \n",
"7483666 0000-0001-9855-1676 1 1 1 \n",
"\n",
" given_names family_name biography other_names urls primary_email \\\n",
"3776350 Maykin Warasart NaN NaN NaN maykin@owasp.org \n",
"7483666 Maykin Warasart NaN NaN NaN maykin@owasp.org \n",
"\n",
" other_emails keywords external_ids education \\\n",
"3776350 [maykin@dga.or.th] NaN NaN NaN \n",
"7483666 [maykin@dga.or.th, maykin@ieee.org] NaN NaN NaN \n",
"\n",
" employment n_works works_source \n",
"3776350 NaN 0 NaN \n",
"7483666 NaN 0 NaN "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'maykin@owasp.org']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3995032</th>\n",
" <td>0000-0002-2232-9638</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Osman</td>\n",
" <td>Perçin</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9068234</th>\n",
" <td>0000-0003-0033-0918</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Osman</td>\n",
" <td>PERÇİN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, Necmettin Erbakan University, Konya, , TR,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"3995032 0000-0002-2232-9638 1 1 1 \n",
"9068234 0000-0003-0033-0918 1 1 1 \n",
"\n",
" given_names family_name biography other_names urls \\\n",
"3995032 Osman Perçin NaN NaN NaN \n",
"9068234 Osman PERÇİN NaN NaN NaN \n",
"\n",
" primary_email other_emails keywords external_ids education \\\n",
"3995032 opercin@erbakan.edu.tr NaN NaN NaN NaN \n",
"9068234 opercin@erbakan.edu.tr NaN NaN NaN NaN \n",
"\n",
" employment n_works \\\n",
"3995032 NaN 0 \n",
"9068234 [[, Necmettin Erbakan University, Konya, , TR,... 0 \n",
"\n",
" works_source \n",
"3995032 NaN \n",
"9068234 NaN "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'opercin@erbakan.edu.tr']"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5087745</th>\n",
" <td>0000-0002-8774-0030</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Patrick</td>\n",
" <td>Davey</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>patrick.davey@monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[PhD Student, Monash University, Melbourne, V...</td>\n",
" <td>1</td>\n",
" <td>[Crossref]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10246485</th>\n",
" <td>0000-0002-9158-1757</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Patrick</td>\n",
" <td>Davey</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>patrick.davey@monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>[Radiopharmaceuticals, Inorganic Chemistry, Bi...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[PhD Student, Monash University, Melbourne, ,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email \\\n",
"5087745 0000-0002-8774-0030 1 1 \n",
"10246485 0000-0002-9158-1757 1 1 \n",
"\n",
" verified_primary_email given_names family_name biography \\\n",
"5087745 1 Patrick Davey NaN \n",
"10246485 1 Patrick Davey NaN \n",
"\n",
" other_names urls primary_email other_emails \\\n",
"5087745 NaN NaN patrick.davey@monash.edu NaN \n",
"10246485 NaN NaN patrick.davey@monash.edu NaN \n",
"\n",
" keywords external_ids \\\n",
"5087745 NaN NaN \n",
"10246485 [Radiopharmaceuticals, Inorganic Chemistry, Bi... NaN \n",
"\n",
" education employment \\\n",
"5087745 NaN [[PhD Student, Monash University, Melbourne, V... \n",
"10246485 NaN [[PhD Student, Monash University, Melbourne, ,... \n",
"\n",
" n_works works_source \n",
"5087745 1 [Crossref] \n",
"10246485 0 NaN "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'patrick.davey@monash.edu']"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"df['primary_email_domain'] = df['primary_email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else x)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 121476\n",
"unique 17047\n",
"top gmail.com\n",
"freq 25892\n",
"Name: primary_email_domain, dtype: object"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email_domain'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" </tr>\n",
" <tr>\n",
" <th>primary_email_domain</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>gmail.com</th>\n",
" <td>25892</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hotmail.com</th>\n",
" <td>3674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yahoo.com</th>\n",
" <td>2578</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163.com</th>\n",
" <td>2067</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yuhs.ac</th>\n",
" <td>1124</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>iiap.gob.pe</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>iiap.org.pe</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>iibb.csic.es</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>iic.hokudai.ac.jp</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>zzuli.edu.cn</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>17047 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid\n",
"primary_email_domain \n",
"gmail.com 25892\n",
"hotmail.com 3674\n",
"yahoo.com 2578\n",
"163.com 2067\n",
"yuhs.ac 1124\n",
"... ...\n",
"iiap.gob.pe 1\n",
"iiap.org.pe 1\n",
"iibb.csic.es 1\n",
"iic.hokudai.ac.jp 1\n",
"zzuli.edu.cn 1\n",
"\n",
"[17047 rows x 1 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"primary_emails = df[['primary_email_domain', 'orcid']].groupby('primary_email_domain').count().sort_values('orcid', ascending=False)\n",
"primary_emails"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"163.com",
"yuhs.ac",
"qq.com",
"outlook.com",
"126.com",
"bu.edu",
"usgs.gov",
"mail.ru",
"usp.br",
"yahoo.com.br",
"ua.pt",
"umich.edu",
"ust.hk",
"foxmail.com",
"uomustansiriyah.edu.iq",
"yandex.ru",
"uq.edu.au",
"ukr.net",
"unesp.br",
"ucl.ac.uk",
"ieee.org",
"stcatz.ox.ac.uk",
"st-annes.ox.ac.uk",
"naver.com",
"yahoo.fr",
"ucm.es",
"live.com"
],
"y": [
25892,
3674,
2578,
2067,
1124,
1035,
914,
755,
626,
584,
564,
455,
454,
291,
290,
278,
249,
242,
237,
234,
220,
214,
204,
203,
185,
184,
182,
172,
166,
159
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"358f2477-6ef6-4bf2-9679-48d5f2695df8\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"358f2477-6ef6-4bf2-9679-48d5f2695df8\")) { Plotly.newPlot( \"358f2477-6ef6-4bf2-9679-48d5f2695df8\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"163.com\", \"yuhs.ac\", \"qq.com\", \"outlook.com\", \"126.com\", \"bu.edu\", \"usgs.gov\", \"mail.ru\", \"usp.br\", \"yahoo.com.br\", \"ua.pt\", \"umich.edu\", \"ust.hk\", \"foxmail.com\", \"uomustansiriyah.edu.iq\", \"yandex.ru\", \"uq.edu.au\", \"ukr.net\", \"unesp.br\", \"ucl.ac.uk\", \"ieee.org\", \"stcatz.ox.ac.uk\", \"st-annes.ox.ac.uk\", \"naver.com\", \"yahoo.fr\", \"ucm.es\", \"live.com\"], \"y\": [25892, 3674, 2578, 2067, 1124, 1035, 914, 755, 626, 584, 564, 455, 454, 291, 290, 278, 249, 242, 237, 234, 220, 214, 204, 203, 185, 184, 182, 172, 166, 159]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 email domains\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('358f2477-6ef6-4bf2-9679-48d5f2695df8');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=primary_emails[:30].sort_values(by=['orcid'], ascending=False).index,\n",
" y=primary_emails[:30].sort_values(by=['orcid'], ascending=False)['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top 30 email domains',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Other emails"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"def extract_email_domains(lst):\n",
" res = []\n",
" for email in lst:\n",
" res.append(email.split('@')[1])\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"df['other_email_domains'] = df['other_emails'].apply(lambda x: extract_email_domains(x) if isinstance(x, list) else x)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>0000-0001-5011-9833</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Mark</td>\n",
" <td>Kilbane</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>mark.kilbane@seh.ox.ac.uk</td>\n",
" <td>[mark.kilbane@bsg.ox.ac.uk]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Blavatnik School of Government; St Edmund Ha...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>seh.ox.ac.uk</td>\n",
" <td>[bsg.ox.ac.uk]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>0000-0001-5017-1295</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Xinfeng</td>\n",
" <td>Tang</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[tang.xinfeng@foxmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>[[Scopus Author ID, 56927186900]]</td>\n",
" <td>[[, , University of Hong Kong, Hong Kong, , HK...</td>\n",
" <td>NaN</td>\n",
" <td>11</td>\n",
" <td>[Scopus - Elsevier, Xinfeng Tang]</td>\n",
" <td>NaN</td>\n",
" <td>[foxmail.com]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299</th>\n",
" <td>0000-0001-5109-3989</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>colin</td>\n",
" <td>tysall</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[colin.tysall@nhs.net]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Associate Mental Health Act Manager, Coventr...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[nhs.net]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>868</th>\n",
" <td>0000-0001-5320-1277</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Gökhan</td>\n",
" <td>KESKİN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2012001598@stu.adu.edu.tr</td>\n",
" <td>[gokhankkeskin@gmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, Adnan Menderes University, Aydin, , TR, gr...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>stu.adu.edu.tr</td>\n",
" <td>[gmail.com]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1176</th>\n",
" <td>0000-0001-5434-9994</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Elena</td>\n",
" <td>Borucu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>lenapasali@gmail.com</td>\n",
" <td>[epasali@yildiz.edu.tr]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>gmail.com</td>\n",
" <td>[yildiz.edu.tr]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"34 0000-0001-5011-9833 1 1 1 \n",
"47 0000-0001-5017-1295 1 1 1 \n",
"299 0000-0001-5109-3989 1 1 1 \n",
"868 0000-0001-5320-1277 1 1 1 \n",
"1176 0000-0001-5434-9994 1 1 1 \n",
"\n",
" given_names family_name biography other_names urls \\\n",
"34 Mark Kilbane NaN NaN NaN \n",
"47 Xinfeng Tang NaN NaN NaN \n",
"299 colin tysall NaN NaN NaN \n",
"868 Gökhan KESKİN NaN NaN NaN \n",
"1176 Elena Borucu NaN NaN NaN \n",
"\n",
" primary_email other_emails keywords \\\n",
"34 mark.kilbane@seh.ox.ac.uk [mark.kilbane@bsg.ox.ac.uk] NaN \n",
"47 NaN [tang.xinfeng@foxmail.com] NaN \n",
"299 NaN [colin.tysall@nhs.net] NaN \n",
"868 2012001598@stu.adu.edu.tr [gokhankkeskin@gmail.com] NaN \n",
"1176 lenapasali@gmail.com [epasali@yildiz.edu.tr] NaN \n",
"\n",
" external_ids \\\n",
"34 NaN \n",
"47 [[Scopus Author ID, 56927186900]] \n",
"299 NaN \n",
"868 NaN \n",
"1176 NaN \n",
"\n",
" education \\\n",
"34 [[Blavatnik School of Government; St Edmund Ha... \n",
"47 [[, , University of Hong Kong, Hong Kong, , HK... \n",
"299 NaN \n",
"868 NaN \n",
"1176 NaN \n",
"\n",
" employment n_works \\\n",
"34 NaN 0 \n",
"47 NaN 11 \n",
"299 [[Associate Mental Health Act Manager, Coventr... 0 \n",
"868 [[, Adnan Menderes University, Aydin, , TR, gr... 0 \n",
"1176 NaN 0 \n",
"\n",
" works_source primary_email_domain \\\n",
"34 NaN seh.ox.ac.uk \n",
"47 [Scopus - Elsevier, Xinfeng Tang] NaN \n",
"299 NaN NaN \n",
"868 NaN stu.adu.edu.tr \n",
"1176 NaN gmail.com \n",
"\n",
" other_email_domains \n",
"34 [bsg.ox.ac.uk] \n",
"47 [foxmail.com] \n",
"299 [nhs.net] \n",
"868 [gmail.com] \n",
"1176 [yildiz.edu.tr] "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['other_email_domains'].notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"df['n_emails'] = df['other_emails'].str.len()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_emails</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2039718</th>\n",
" <td>0000-0003-4171-3835</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57198</th>\n",
" <td>0000-0001-6239-2968</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10524509</th>\n",
" <td>0000-0003-2290-2817</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7785216</th>\n",
" <td>0000-0003-2151-4089</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3556386</th>\n",
" <td>0000-0001-9084-3156</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747035</th>\n",
" <td>0000-0003-4998-1551</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747036</th>\n",
" <td>0000-0003-4998-4111</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747037</th>\n",
" <td>0000-0003-4998-6045</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747038</th>\n",
" <td>0000-0003-4998-8868</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747039</th>\n",
" <td>0000-0003-4999-7916</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10744621 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_emails\n",
"2039718 0000-0003-4171-3835 12.0\n",
"57198 0000-0001-6239-2968 9.0\n",
"10524509 0000-0003-2290-2817 7.0\n",
"7785216 0000-0003-2151-4089 7.0\n",
"3556386 0000-0001-9084-3156 6.0\n",
"... ... ...\n",
"10747035 0000-0003-4998-1551 NaN\n",
"10747036 0000-0003-4998-4111 NaN\n",
"10747037 0000-0003-4998-6045 NaN\n",
"10747038 0000-0003-4998-8868 NaN\n",
"10747039 0000-0003-4999-7916 NaN\n",
"\n",
"[10744621 rows x 2 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values('n_emails', ascending=False)[['orcid', 'n_emails']]"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"grouped_other_emails = df[['orcid', 'other_email_domains']]\\\n",
" .explode('other_email_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('other_email_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"163.com",
"qq.com",
"outlook.com",
"126.com",
"usp.br",
"ieee.org",
"mail.ru",
"yahoo.com.br",
"unesp.br",
"sbs.ox.ac.uk",
"yuhs.ac",
"naver.com",
"icloud.com",
"ua.pt",
"uq.edu.au",
"foxmail.com",
"cam.ac.uk",
"ukr.net",
"law.ox.ac.uk",
"imperial.ac.uk",
"mit.edu",
"monash.edu",
"ucl.ac.uk",
"education.ox.ac.uk",
"stanford.edu",
"ucm.es",
"conted.ox.ac.uk"
],
"y": [
10856,
1521,
1263,
763,
755,
422,
256,
235,
223,
147,
146,
138,
136,
130,
128,
113,
92,
90,
90,
81,
76,
75,
75,
74,
69,
67,
67,
66,
65,
64
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 other email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"ba9dd088-a83d-47b9-88a8-7e260424de87\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"ba9dd088-a83d-47b9-88a8-7e260424de87\")) { Plotly.newPlot( \"ba9dd088-a83d-47b9-88a8-7e260424de87\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"163.com\", \"qq.com\", \"outlook.com\", \"126.com\", \"usp.br\", \"ieee.org\", \"mail.ru\", \"yahoo.com.br\", \"unesp.br\", \"sbs.ox.ac.uk\", \"yuhs.ac\", \"naver.com\", \"icloud.com\", \"ua.pt\", \"uq.edu.au\", \"foxmail.com\", \"cam.ac.uk\", \"ukr.net\", \"law.ox.ac.uk\", \"imperial.ac.uk\", \"mit.edu\", \"monash.edu\", \"ucl.ac.uk\", \"education.ox.ac.uk\", \"stanford.edu\", \"ucm.es\", \"conted.ox.ac.uk\"], \"y\": [10856, 1521, 1263, 763, 755, 422, 256, 235, 223, 147, 146, 138, 136, 130, 128, 113, 92, 90, 90, 81, 76, 75, 75, 74, 69, 67, 67, 66, 65, 64]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 other email domains\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('ba9dd088-a83d-47b9-88a8-7e260424de87');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=grouped_other_emails[:30].sort_values(by=['orcid'], ascending=False).index,\n",
" y=grouped_other_emails[:30].sort_values(by=['orcid'], ascending=False)['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top 30 other email domains',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Email speculation"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>other_emails</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>0000-0001-5017-1295</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Xinfeng</td>\n",
" <td>Tang</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[tang.xinfeng@foxmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>[[Scopus Author ID, 56927186900]]</td>\n",
" <td>[[, , University of Hong Kong, Hong Kong, , HK...</td>\n",
" <td>NaN</td>\n",
" <td>11</td>\n",
" <td>[Scopus - Elsevier, Xinfeng Tang]</td>\n",
" <td>NaN</td>\n",
" <td>[foxmail.com]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299</th>\n",
" <td>0000-0001-5109-3989</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>colin</td>\n",
" <td>tysall</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[colin.tysall@nhs.net]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Associate Mental Health Act Manager, Coventr...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[nhs.net]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1296</th>\n",
" <td>0000-0001-5476-0126</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Aura Windy</td>\n",
" <td>Hernández Cetina</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[u0902038@unimilitar.edu.co]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, Profesional en Relaciones Internacionales ...</td>\n",
" <td>[[Asistente de Investigación, Pontificia Unive...</td>\n",
" <td>1</td>\n",
" <td>[Aura Windy Hernández Cetina]</td>\n",
" <td>NaN</td>\n",
" <td>[unimilitar.edu.co]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1429</th>\n",
" <td>0000-0001-5522-427X</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Süleyman</td>\n",
" <td>Özen</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Academic CV, https://akademik.yok.gov.tr/Aka...</td>\n",
" <td>NaN</td>\n",
" <td>[suleyman.ozen@btu.edu.tr]</td>\n",
" <td>[construction materials, superplasticizers, co...</td>\n",
" <td>[[Scopus Author ID, 57188750603]]</td>\n",
" <td>[[Civil Engineering, MSc and PhD, Uludağ Unive...</td>\n",
" <td>[[Dr., Bursa Technical University, Bursa, , TR...</td>\n",
" <td>7</td>\n",
" <td>[Scopus - Elsevier, Crossref]</td>\n",
" <td>NaN</td>\n",
" <td>[btu.edu.tr]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1628</th>\n",
" <td>0000-0001-5597-3115</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Wade</td>\n",
" <td>Harrison</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[wade_harrison@unc.edu]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, MD, Dartmouth College Geisel School of Med...</td>\n",
" <td>[[Clinical Instructor / Research Fellow, Unive...</td>\n",
" <td>7</td>\n",
" <td>[Wade Harrison]</td>\n",
" <td>NaN</td>\n",
" <td>[unc.edu]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10743658</th>\n",
" <td>0000-0003-3740-8352</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Rui</td>\n",
" <td>Zhang</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[zhang-r15@mails.tsinghua.edu.cn]</td>\n",
" <td>[Lithium metal batteries, Graphene]</td>\n",
" <td>[[ResearcherID, B-3843-2015]]</td>\n",
" <td>[[Department of Chemical Engineering, Ph.D. st...</td>\n",
" <td>NaN</td>\n",
" <td>15</td>\n",
" <td>[ResearcherID, Crossref]</td>\n",
" <td>NaN</td>\n",
" <td>[mails.tsinghua.edu.cn]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10744876</th>\n",
" <td>0000-0003-4192-6451</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Sanjib Raj</td>\n",
" <td>Pandey</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Personal, https://www.sanjibpandey.wix.com/p...</td>\n",
" <td>NaN</td>\n",
" <td>[srpandey@gmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Computing and Information System, PhD, Unive...</td>\n",
" <td>[[Software Developer &amp; Research Associate, Oxl...</td>\n",
" <td>11</td>\n",
" <td>[BASE - Bielefeld Academic Search Engine, Dr. ...</td>\n",
" <td>NaN</td>\n",
" <td>[gmail.com]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10745274</th>\n",
" <td>0000-0003-4333-9728</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Mario</td>\n",
" <td>De la Fuente Lloreda</td>\n",
" <td>Person in charge to coordinate the scientific ...</td>\n",
" <td>[M.de la Fuente, De la Fuente, M.]</td>\n",
" <td>[[researchgate profile, https://www.researchga...</td>\n",
" <td>NaN</td>\n",
" <td>[mariofuente@gmail.com]</td>\n",
" <td>[vineyard management, grapevine, viticulture, ...</td>\n",
" <td>[[Scopus Author ID, 47960975000]]</td>\n",
" <td>[[Producción Vegetal, Doctor en Viticultura, U...</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>[Scopus - Elsevier]</td>\n",
" <td>NaN</td>\n",
" <td>[gmail.com]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10745417</th>\n",
" <td>0000-0003-4383-4745</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Jie</td>\n",
" <td>Yang</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[jyang@esat.kuleuven.be]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[faculty of engineering science, Dr., KU Leuv...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[esat.kuleuven.be]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10746702</th>\n",
" <td>0000-0003-4878-2737</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Aleksey</td>\n",
" <td>Adamtsevich</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Moscow State University of Civil Engineering...</td>\n",
" <td>NaN</td>\n",
" <td>[AdamtsevichAO@mgsu.ru]</td>\n",
" <td>[concrete, calorimetry, cement, construction, ...</td>\n",
" <td>[[Scopus Author ID, 56301531000], [ResearcherI...</td>\n",
" <td>[[, Engineer (Industrial and Civil Engineering...</td>\n",
" <td>[[Senior Researcher, Moscow State University o...</td>\n",
" <td>25</td>\n",
" <td>[Scopus - Elsevier, ResearcherID]</td>\n",
" <td>NaN</td>\n",
" <td>[mgsu.ru]</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>19409 rows × 20 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email \\\n",
"47 0000-0001-5017-1295 1 1 \n",
"299 0000-0001-5109-3989 1 1 \n",
"1296 0000-0001-5476-0126 1 1 \n",
"1429 0000-0001-5522-427X 1 1 \n",
"1628 0000-0001-5597-3115 1 1 \n",
"... ... ... ... \n",
"10743658 0000-0003-3740-8352 1 1 \n",
"10744876 0000-0003-4192-6451 1 1 \n",
"10745274 0000-0003-4333-9728 1 1 \n",
"10745417 0000-0003-4383-4745 1 1 \n",
"10746702 0000-0003-4878-2737 1 1 \n",
"\n",
" verified_primary_email given_names family_name \\\n",
"47 1 Xinfeng Tang \n",
"299 1 colin tysall \n",
"1296 1 Aura Windy Hernández Cetina \n",
"1429 1 Süleyman Özen \n",
"1628 1 Wade Harrison \n",
"... ... ... ... \n",
"10743658 1 Rui Zhang \n",
"10744876 1 Sanjib Raj Pandey \n",
"10745274 1 Mario De la Fuente Lloreda \n",
"10745417 1 Jie Yang \n",
"10746702 1 Aleksey Adamtsevich \n",
"\n",
" biography \\\n",
"47 NaN \n",
"299 NaN \n",
"1296 NaN \n",
"1429 NaN \n",
"1628 NaN \n",
"... ... \n",
"10743658 NaN \n",
"10744876 NaN \n",
"10745274 Person in charge to coordinate the scientific ... \n",
"10745417 NaN \n",
"10746702 NaN \n",
"\n",
" other_names \\\n",
"47 NaN \n",
"299 NaN \n",
"1296 NaN \n",
"1429 NaN \n",
"1628 NaN \n",
"... ... \n",
"10743658 NaN \n",
"10744876 NaN \n",
"10745274 [M.de la Fuente, De la Fuente, M.] \n",
"10745417 NaN \n",
"10746702 NaN \n",
"\n",
" urls primary_email \\\n",
"47 NaN NaN \n",
"299 NaN NaN \n",
"1296 NaN NaN \n",
"1429 [[Academic CV, https://akademik.yok.gov.tr/Aka... NaN \n",
"1628 NaN NaN \n",
"... ... ... \n",
"10743658 NaN NaN \n",
"10744876 [[Personal, https://www.sanjibpandey.wix.com/p... NaN \n",
"10745274 [[researchgate profile, https://www.researchga... NaN \n",
"10745417 NaN NaN \n",
"10746702 [[Moscow State University of Civil Engineering... NaN \n",
"\n",
" other_emails \\\n",
"47 [tang.xinfeng@foxmail.com] \n",
"299 [colin.tysall@nhs.net] \n",
"1296 [u0902038@unimilitar.edu.co] \n",
"1429 [suleyman.ozen@btu.edu.tr] \n",
"1628 [wade_harrison@unc.edu] \n",
"... ... \n",
"10743658 [zhang-r15@mails.tsinghua.edu.cn] \n",
"10744876 [srpandey@gmail.com] \n",
"10745274 [mariofuente@gmail.com] \n",
"10745417 [jyang@esat.kuleuven.be] \n",
"10746702 [AdamtsevichAO@mgsu.ru] \n",
"\n",
" keywords \\\n",
"47 NaN \n",
"299 NaN \n",
"1296 NaN \n",
"1429 [construction materials, superplasticizers, co... \n",
"1628 NaN \n",
"... ... \n",
"10743658 [Lithium metal batteries, Graphene] \n",
"10744876 NaN \n",
"10745274 [vineyard management, grapevine, viticulture, ... \n",
"10745417 NaN \n",
"10746702 [concrete, calorimetry, cement, construction, ... \n",
"\n",
" external_ids \\\n",
"47 [[Scopus Author ID, 56927186900]] \n",
"299 NaN \n",
"1296 NaN \n",
"1429 [[Scopus Author ID, 57188750603]] \n",
"1628 NaN \n",
"... ... \n",
"10743658 [[ResearcherID, B-3843-2015]] \n",
"10744876 NaN \n",
"10745274 [[Scopus Author ID, 47960975000]] \n",
"10745417 NaN \n",
"10746702 [[Scopus Author ID, 56301531000], [ResearcherI... \n",
"\n",
" education \\\n",
"47 [[, , University of Hong Kong, Hong Kong, , HK... \n",
"299 NaN \n",
"1296 [[, Profesional en Relaciones Internacionales ... \n",
"1429 [[Civil Engineering, MSc and PhD, Uludağ Unive... \n",
"1628 [[, MD, Dartmouth College Geisel School of Med... \n",
"... ... \n",
"10743658 [[Department of Chemical Engineering, Ph.D. st... \n",
"10744876 [[Computing and Information System, PhD, Unive... \n",
"10745274 [[Producción Vegetal, Doctor en Viticultura, U... \n",
"10745417 [[faculty of engineering science, Dr., KU Leuv... \n",
"10746702 [[, Engineer (Industrial and Civil Engineering... \n",
"\n",
" employment n_works \\\n",
"47 NaN 11 \n",
"299 [[Associate Mental Health Act Manager, Coventr... 0 \n",
"1296 [[Asistente de Investigación, Pontificia Unive... 1 \n",
"1429 [[Dr., Bursa Technical University, Bursa, , TR... 7 \n",
"1628 [[Clinical Instructor / Research Fellow, Unive... 7 \n",
"... ... ... \n",
"10743658 NaN 15 \n",
"10744876 [[Software Developer & Research Associate, Oxl... 11 \n",
"10745274 NaN 3 \n",
"10745417 NaN 0 \n",
"10746702 [[Senior Researcher, Moscow State University o... 25 \n",
"\n",
" works_source \\\n",
"47 [Scopus - Elsevier, Xinfeng Tang] \n",
"299 NaN \n",
"1296 [Aura Windy Hernández Cetina] \n",
"1429 [Scopus - Elsevier, Crossref] \n",
"1628 [Wade Harrison] \n",
"... ... \n",
"10743658 [ResearcherID, Crossref] \n",
"10744876 [BASE - Bielefeld Academic Search Engine, Dr. ... \n",
"10745274 [Scopus - Elsevier] \n",
"10745417 NaN \n",
"10746702 [Scopus - Elsevier, ResearcherID] \n",
"\n",
" primary_email_domain other_email_domains n_emails \n",
"47 NaN [foxmail.com] 1.0 \n",
"299 NaN [nhs.net] 1.0 \n",
"1296 NaN [unimilitar.edu.co] 1.0 \n",
"1429 NaN [btu.edu.tr] 1.0 \n",
"1628 NaN [unc.edu] 1.0 \n",
"... ... ... ... \n",
"10743658 NaN [mails.tsinghua.edu.cn] 1.0 \n",
"10744876 NaN [gmail.com] 1.0 \n",
"10745274 NaN [gmail.com] 1.0 \n",
"10745417 NaN [esat.kuleuven.be] 1.0 \n",
"10746702 NaN [mgsu.ru] 1.0 \n",
"\n",
"[19409 rows x 20 columns]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'].isna() & df['other_emails'].notna()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## URLs"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"def extract_url_domains(lst):\n",
" domains = []\n",
" for e in lst:\n",
" # e[0] is a string describing the url\n",
" # e[1] is the url\n",
" domain = tldextract.extract(e[1])\n",
" domains.append(domain.registered_domain)\n",
" return domains"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"df['url_domains'] = df['urls'].apply(lambda x: extract_url_domains(x) if isinstance(x, list) else x)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0000-0001-5001-4994</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Siren</td>\n",
" <td>Rühs</td>\n",
" <td>I am an oceanographer studying the interannual...</td>\n",
" <td>[Siren Ruehs]</td>\n",
" <td>[[ResearchGate, https://www.researchgate.net/p...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>11</td>\n",
" <td>[Siren Rühs]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[researchgate.net]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0000-0001-5004-7761</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>scaffolding</td>\n",
" <td>hire</td>\n",
" <td>NaN</td>\n",
" <td>[The first feature that you have to check in t...</td>\n",
" <td>[[scaffolding hire Wellington, https://www.tig...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[scaffolding hire Wellington]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[tigerscaffolds.co.nz]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0000-0001-5005-0557</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Sen</td>\n",
" <td>RT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Research on Psychology, psychiatry, Genetics...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[corticalbrain.com]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0000-0001-5009-8091</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Gabriela</td>\n",
" <td>Madruga</td>\n",
" <td>Possui graduação em Medicina Veterinaria pela ...</td>\n",
" <td>[Gabriela Morais Madruga]</td>\n",
" <td>[[Curriculo lattes, http://buscatextual.cnpq.b...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[veterinary ophthalmology]</td>\n",
" <td>NaN</td>\n",
" <td>[[Surgery in small animal, PhD, Universidade E...</td>\n",
" <td>[[PhD , University of Minnesota, Minneapolis, ...</td>\n",
" <td>14</td>\n",
" <td>[Gabriela Madruga]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>0000-0001-5010-9539</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Sangram Keshari</td>\n",
" <td>Sahu</td>\n",
" <td>NaN</td>\n",
" <td>[sk-sahu]</td>\n",
" <td>[[Academic webpage, https://sksahu.net]]</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[Computational Genomics and Bioinformatics]</td>\n",
" <td>[[Loop profile, 1098977]]</td>\n",
" <td>[[Centre for Bioinformatics, M.Sc. Bioinformat...</td>\n",
" <td>[[Bioinformatics Junior Research Fellow, India...</td>\n",
" <td>3</td>\n",
" <td>[Crossref Metadata Search, Sangram Keshari Sahu]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[sksahu.net]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"5 0000-0001-5001-4994 1 1 1 \n",
"14 0000-0001-5004-7761 1 1 1 \n",
"15 0000-0001-5005-0557 1 1 1 \n",
"29 0000-0001-5009-8091 1 1 1 \n",
"30 0000-0001-5010-9539 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"5 Siren Rühs \n",
"14 scaffolding hire \n",
"15 Sen RT \n",
"29 Gabriela Madruga \n",
"30 Sangram Keshari Sahu \n",
"\n",
" biography \\\n",
"5 I am an oceanographer studying the interannual... \n",
"14 NaN \n",
"15 NaN \n",
"29 Possui graduação em Medicina Veterinaria pela ... \n",
"30 NaN \n",
"\n",
" other_names \\\n",
"5 [Siren Ruehs] \n",
"14 [The first feature that you have to check in t... \n",
"15 NaN \n",
"29 [Gabriela Morais Madruga] \n",
"30 [sk-sahu] \n",
"\n",
" urls primary_email ... \\\n",
"5 [[ResearchGate, https://www.researchgate.net/p... NaN ... \n",
"14 [[scaffolding hire Wellington, https://www.tig... NaN ... \n",
"15 [[Research on Psychology, psychiatry, Genetics... NaN ... \n",
"29 [[Curriculo lattes, http://buscatextual.cnpq.b... NaN ... \n",
"30 [[Academic webpage, https://sksahu.net]] NaN ... \n",
"\n",
" keywords external_ids \\\n",
"5 NaN NaN \n",
"14 [scaffolding hire Wellington] NaN \n",
"15 NaN NaN \n",
"29 [veterinary ophthalmology] NaN \n",
"30 [Computational Genomics and Bioinformatics] [[Loop profile, 1098977]] \n",
"\n",
" education \\\n",
"5 NaN \n",
"14 NaN \n",
"15 NaN \n",
"29 [[Surgery in small animal, PhD, Universidade E... \n",
"30 [[Centre for Bioinformatics, M.Sc. Bioinformat... \n",
"\n",
" employment n_works \\\n",
"5 NaN 11 \n",
"14 NaN 0 \n",
"15 NaN 0 \n",
"29 [[PhD , University of Minnesota, Minneapolis, ... 14 \n",
"30 [[Bioinformatics Junior Research Fellow, India... 3 \n",
"\n",
" works_source primary_email_domain \\\n",
"5 [Siren Rühs] NaN \n",
"14 NaN NaN \n",
"15 NaN NaN \n",
"29 [Gabriela Madruga] NaN \n",
"30 [Crossref Metadata Search, Sangram Keshari Sahu] NaN \n",
"\n",
" other_email_domains n_emails url_domains \n",
"5 NaN NaN [researchgate.net] \n",
"14 NaN NaN [tigerscaffolds.co.nz] \n",
"15 NaN NaN [corticalbrain.com] \n",
"29 NaN NaN [cnpq.br] \n",
"30 NaN NaN [sksahu.net] \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['url_domains'].notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"df['n_urls'] = df['url_domains'].str.len()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>70577</th>\n",
" <td>0000-0002-1234-835X</td>\n",
" <td>219.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5164541</th>\n",
" <td>0000-0001-7478-4539</td>\n",
" <td>174.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1215225</th>\n",
" <td>0000-0002-7392-3792</td>\n",
" <td>169.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10240510</th>\n",
" <td>0000-0002-6938-9638</td>\n",
" <td>152.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4004281</th>\n",
" <td>0000-0002-5710-4041</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747035</th>\n",
" <td>0000-0003-4998-1551</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747036</th>\n",
" <td>0000-0003-4998-4111</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747037</th>\n",
" <td>0000-0003-4998-6045</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747038</th>\n",
" <td>0000-0003-4998-8868</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747039</th>\n",
" <td>0000-0003-4999-7916</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10744621 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_urls\n",
"70577 0000-0002-1234-835X 219.0\n",
"5164541 0000-0001-7478-4539 174.0\n",
"1215225 0000-0002-7392-3792 169.0\n",
"10240510 0000-0002-6938-9638 152.0\n",
"4004281 0000-0002-5710-4041 114.0\n",
"... ... ...\n",
"10747035 0000-0003-4998-1551 NaN\n",
"10747036 0000-0003-4998-4111 NaN\n",
"10747037 0000-0003-4998-6045 NaN\n",
"10747038 0000-0003-4998-8868 NaN\n",
"10747039 0000-0003-4999-7916 NaN\n",
"\n",
"[10744621 rows x 2 columns]"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values('n_urls', ascending=False)[['orcid', 'n_urls']]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0002-1234-835X",
"0000-0001-7478-4539",
"0000-0002-7392-3792",
"0000-0002-6938-9638",
"0000-0002-5710-4041",
"0000-0003-2450-090X",
"0000-0002-3920-7389",
"0000-0002-6689-4129",
"0000-0002-4621-5571",
"0000-0001-9131-1266",
"0000-0002-7754-8889",
"0000-0002-5250-1144",
"0000-0002-9025-8632",
"0000-0002-7456-3848",
"0000-0003-0176-1293",
"0000-0003-0321-7339",
"0000-0002-8493-0402",
"0000-0002-9965-2425",
"0000-0001-8873-6677",
"0000-0002-3997-5070",
"0000-0002-1856-6905",
"0000-0002-4062-3603",
"0000-0002-4316-1467",
"0000-0002-0752-7513",
"0000-0001-5880-7091",
"0000-0003-1524-6268",
"0000-0003-0594-2462",
"0000-0003-2593-7134",
"0000-0002-1298-5252",
"0000-0003-1761-3842",
"0000-0003-2383-8386",
"0000-0003-3546-2312",
"0000-0002-2886-9248",
"0000-0002-1929-6054",
"0000-0003-2183-8112",
"0000-0003-2407-3557",
"0000-0002-4305-4215",
"0000-0003-0796-0234",
"0000-0001-7133-6896",
"0000-0002-9276-6921",
"0000-0002-7568-3403",
"0000-0003-1484-6958",
"0000-0002-4004-6666",
"0000-0002-8208-0897",
"0000-0002-9071-5450",
"0000-0002-8116-9611",
"0000-0003-4993-5555",
"0000-0003-0930-6121",
"0000-0003-4948-9268",
"0000-0002-8122-879X",
"0000-0001-9559-1103",
"0000-0002-3277-9659",
"0000-0003-2862-6315",
"0000-0002-2000-8339",
"0000-0001-5300-4601",
"0000-0002-6547-0172",
"0000-0003-4808-6619",
"0000-0002-6254-8683",
"0000-0002-0971-9375",
"0000-0003-3933-0229",
"0000-0002-4659-5391",
"0000-0003-0694-1154",
"0000-0002-2916-2893",
"0000-0003-4501-3756",
"0000-0001-6783-2037",
"0000-0001-6461-2573",
"0000-0001-5549-6822",
"0000-0001-8978-4830",
"0000-0003-4326-9336",
"0000-0001-8096-4333",
"0000-0002-8940-3177",
"0000-0002-6680-1703",
"0000-0002-5946-1595",
"0000-0002-8593-9257",
"0000-0002-7653-4899",
"0000-0002-5196-4905",
"0000-0001-8808-4867",
"0000-0002-5139-2660",
"0000-0001-6921-0426",
"0000-0003-1815-1993",
"0000-0002-7843-8497",
"0000-0003-1675-2840",
"0000-0001-8644-2114",
"0000-0003-0907-9870",
"0000-0001-8986-2528",
"0000-0002-5265-6074",
"0000-0001-7784-0583",
"0000-0001-7550-5802",
"0000-0001-6979-4273",
"0000-0001-9102-8639",
"0000-0002-3334-9386",
"0000-0002-0696-8560",
"0000-0002-7179-6953",
"0000-0002-9771-600X",
"0000-0002-8797-6502",
"0000-0002-5241-1026",
"0000-0001-7193-5039",
"0000-0001-9119-5955",
"0000-0003-2133-2648",
"0000-0001-9026-4795"
],
"y": [
219,
174,
169,
152,
114,
114,
111,
104,
90,
83,
83,
81,
81,
80,
80,
80,
76,
73,
72,
71,
70,
69,
69,
68,
68,
68,
68,
67,
67,
66,
66,
65,
64,
61,
61,
59,
57,
57,
57,
57,
57,
57,
57,
56,
55,
55,
55,
55,
51,
50,
50,
50,
49,
49,
48,
48,
48,
48,
47,
47,
46,
46,
45,
45,
45,
45,
44,
43,
43,
43,
43,
42,
42,
42,
41,
41,
40,
40,
40,
39,
39,
39,
39,
38,
38,
38,
38,
38,
37,
37,
37,
37,
37,
36,
36,
36,
36,
36,
36,
36
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 100 ORCID with URLs"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"63a77c35-4bbf-49b4-884b-d17c3e1ded85\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"63a77c35-4bbf-49b4-884b-d17c3e1ded85\")) { Plotly.newPlot( \"63a77c35-4bbf-49b4-884b-d17c3e1ded85\", [{\"type\": \"bar\", \"x\": [\"0000-0002-1234-835X\", \"0000-0001-7478-4539\", \"0000-0002-7392-3792\", \"0000-0002-6938-9638\", \"0000-0002-5710-4041\", \"0000-0003-2450-090X\", \"0000-0002-3920-7389\", \"0000-0002-6689-4129\", \"0000-0002-4621-5571\", \"0000-0001-9131-1266\", \"0000-0002-7754-8889\", \"0000-0002-5250-1144\", \"0000-0002-9025-8632\", \"0000-0002-7456-3848\", \"0000-0003-0176-1293\", \"0000-0003-0321-7339\", \"0000-0002-8493-0402\", \"0000-0002-9965-2425\", \"0000-0001-8873-6677\", \"0000-0002-3997-5070\", \"0000-0002-1856-6905\", \"0000-0002-4062-3603\", \"0000-0002-4316-1467\", \"0000-0002-0752-7513\", \"0000-0001-5880-7091\", \"0000-0003-1524-6268\", \"0000-0003-0594-2462\", \"0000-0003-2593-7134\", \"0000-0002-1298-5252\", \"0000-0003-1761-3842\", \"0000-0003-2383-8386\", \"0000-0003-3546-2312\", \"0000-0002-2886-9248\", \"0000-0002-1929-6054\", \"0000-0003-2183-8112\", \"0000-0003-2407-3557\", \"0000-0002-4305-4215\", \"0000-0003-0796-0234\", \"0000-0001-7133-6896\", \"0000-0002-9276-6921\", \"0000-0002-7568-3403\", \"0000-0003-1484-6958\", \"0000-0002-4004-6666\", \"0000-0002-8208-0897\", \"0000-0002-9071-5450\", \"0000-0002-8116-9611\", \"0000-0003-4993-5555\", \"0000-0003-0930-6121\", \"0000-0003-4948-9268\", \"0000-0002-8122-879X\", \"0000-0001-9559-1103\", \"0000-0002-3277-9659\", \"0000-0003-2862-6315\", \"0000-0002-2000-8339\", \"0000-0001-5300-4601\", \"0000-0002-6547-0172\", \"0000-0003-4808-6619\", \"0000-0002-6254-8683\", \"0000-0002-0971-9375\", \"0000-0003-3933-0229\", \"0000-0002-4659-5391\", \"0000-0003-0694-1154\", \"0000-0002-2916-2893\", \"0000-0003-4501-3756\", \"0000-0001-6783-2037\", \"0000-0001-6461-2573\", \"0000-0001-5549-6822\", \"0000-0001-8978-4830\", \"0000-0003-4326-9336\", \"0000-0001-8096-4333\", \"0000-0002-8940-3177\", \"0000-0002-6680-1703\", \"0000-0002-5946-1595\", \"0000-0002-8593-9257\", \"0000-0002-7653-4899\", \"0000-0002-5196-4905\", \"0000-0001-8808-4867\", \"0000-0002-5139-2660\", \"0000-0001-6921-0426\", \"0000-0003-1815-1993\", \"0000-0002-7843-8497\", \"0000-0003-1675-2840\", \"0000-0001-8644-2114\", \"0000-0003-0907-9870\", \"0000-0001-8986-2528\", \"0000-0002-5265-6074\", \"0000-0001-7784-0583\", \"0000-0001-7550-5802\", \"0000-0001-6979-4273\", \"0000-0001-9102-8639\", \"0000-0002-3334-9386\", \"0000-0002-0696-8560\", \"0000-0002-7179-6953\", \"0000-0002-9771-600X\", \"0000-0002-8797-6502\", \"0000-0002-5241-1026\", \"0000-0001-7193-5039\", \"0000-0001-9119-5955\", \"0000-0003-2133-2648\", \"0000-0001-9026-4795\"], \"y\": [219.0, 174.0, 169.0, 152.0, 114.0, 114.0, 111.0, 104.0, 90.0, 83.0, 83.0, 81.0, 81.0, 80.0, 80.0, 80.0, 76.0, 73.0, 72.0, 71.0, 70.0, 69.0, 69.0, 68.0, 68.0, 68.0, 68.0, 67.0, 67.0, 66.0, 66.0, 65.0, 64.0, 61.0, 61.0, 59.0, 57.0, 57.0, 57.0, 57.0, 57.0, 57.0, 57.0, 56.0, 55.0, 55.0, 55.0, 55.0, 51.0, 50.0, 50.0, 50.0, 49.0, 49.0, 48.0, 48.0, 48.0, 48.0, 47.0, 47.0, 46.0, 46.0, 45.0, 45.0, 45.0, 45.0, 44.0, 43.0, 43.0, 43.0, 43.0, 42.0, 42.0, 42.0, 41.0, 41.0, 40.0, 40.0, 40.0, 39.0, 39.0, 39.0, 39.0, 38.0, 38.0, 38.0, 38.0, 38.0, 37.0, 37.0, 37.0, 37.0, 37.0, 36.0, 36.0, 36.0, 36.0, 36.0, 36.0, 36.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 100 ORCID with URLs\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('63a77c35-4bbf-49b4-884b-d17c3e1ded85');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=df.sort_values(by=['n_urls'], ascending=False)['orcid'][:100],\n",
" y=df.sort_values(by=['n_urls'], ascending=False)['n_urls'][:100]\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top 100 ORCID with URLs',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"grouped_urls = df[['orcid', 'url_domains']]\\\n",
" .explode('url_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('url_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"linkedin.com",
"researchgate.net",
"google.com",
"cnpq.br",
"academia.edu",
"twitter.com",
"facebook.com",
"publons.com",
"wordpress.com",
"mendeley.com",
"instagram.com",
"github.io",
"google.com.ua",
"blogspot.com",
"google.es",
"github.com",
"helsinki.fi",
"unirioja.es",
"youtube.com",
"wixsite.com",
"ku.dk",
"scopus.com",
"",
"weebly.com",
"us.es",
"kth.se",
"cityu.edu.hk",
"kcl.ac.uk",
"au.dk",
"ucl.ac.uk"
],
"y": [
75344,
66267,
43468,
23936,
20786,
18017,
14552,
10339,
8883,
7003,
5532,
5371,
5273,
5158,
5070,
5053,
4682,
4549,
4196,
4053,
3730,
3481,
3332,
3083,
3029,
2944,
2719,
2711,
2640,
2581
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 URL domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"18fea7de-fcd6-4549-8572-1761353bd322\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"18fea7de-fcd6-4549-8572-1761353bd322\")) { Plotly.newPlot( \"18fea7de-fcd6-4549-8572-1761353bd322\", [{\"type\": \"bar\", \"x\": [\"linkedin.com\", \"researchgate.net\", \"google.com\", \"cnpq.br\", \"academia.edu\", \"twitter.com\", \"facebook.com\", \"publons.com\", \"wordpress.com\", \"mendeley.com\", \"instagram.com\", \"github.io\", \"google.com.ua\", \"blogspot.com\", \"google.es\", \"github.com\", \"helsinki.fi\", \"unirioja.es\", \"youtube.com\", \"wixsite.com\", \"ku.dk\", \"scopus.com\", \"\", \"weebly.com\", \"us.es\", \"kth.se\", \"cityu.edu.hk\", \"kcl.ac.uk\", \"au.dk\", \"ucl.ac.uk\"], \"y\": [75344, 66267, 43468, 23936, 20786, 18017, 14552, 10339, 8883, 7003, 5532, 5371, 5273, 5158, 5070, 5053, 4682, 4549, 4196, 4053, 3730, 3481, 3332, 3083, 3029, 2944, 2719, 2711, 2640, 2581]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 URL domains\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('18fea7de-fcd6-4549-8572-1761353bd322');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=grouped_urls[:30].sort_values(by=['orcid'], ascending=False).index,\n",
" y=grouped_urls[:30].sort_values(by=['orcid'], ascending=False)['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top 30 URL domains',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>482862</th>\n",
" <td>0000-0003-4948-9268</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Gustavo</td>\n",
" <td>Duperré</td>\n",
" <td>Gustavo Norberto Duperré graduated in Arts and...</td>\n",
" <td>[Gustavo Norberto Duperré, Duperré, G. N.]</td>\n",
" <td>[[Gis in Cultural Heritage - ICOMOS România, h...</td>\n",
" <td>gustavo.duperre@usal.edu.ar</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 57195936346], [ResearcherI...</td>\n",
" <td>[[Programme in History, History of Art and Ter...</td>\n",
" <td>[[Titular Professor, Dirección General de Cult...</td>\n",
" <td>13</td>\n",
" <td>[Gustavo Duperré, Scopus - Elsevier, Publons, ...</td>\n",
" <td>usal.edu.ar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[icomos.ro, unirioja.es, unirioja.es, unc.edu....</td>\n",
" <td>51.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>554859</th>\n",
" <td>0000-0002-1929-6054</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Franklin Américo</td>\n",
" <td>Canaza Choque</td>\n",
" <td>Docente-Investigador Social. Maestrando en Der...</td>\n",
" <td>[Franklin Américo Canaza-Choque , Franklin A. ...</td>\n",
" <td>[[Consejo Nacional de Ciencia, Tecnología e In...</td>\n",
" <td>Leo_123fa@hotmail.com</td>\n",
" <td>...</td>\n",
" <td>[[ResearcherID, P-8613-2018], [Loop profile, 8...</td>\n",
" <td>[[Facultad de Ciencias de la Educación , Maest...</td>\n",
" <td>[[Investigador Social, Universidad Católica de...</td>\n",
" <td>38</td>\n",
" <td>[ResearcherID, BASE - Bielefeld Academic Searc...</td>\n",
" <td>hotmail.com</td>\n",
" <td>[gmail.com, gmail.com, hotmail.com, baldwin.ed...</td>\n",
" <td>5.0</td>\n",
" <td>[concytec.gob.pe, redalyc.org, redalyc.org, un...</td>\n",
" <td>61.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1381092</th>\n",
" <td>0000-0002-9025-8632</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>buycannabis</td>\n",
" <td>dispensary</td>\n",
" <td>We procure and deliver premium cannabis strain...</td>\n",
" <td>[We procure and deliver premium cannabis strai...</td>\n",
" <td>[[find your cannabis &amp; marijuana dispensary , ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" <td>[goowonderland dispensary]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[goowonderland.com, goowonderland.com, goowond...</td>\n",
" <td>81.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2679353</th>\n",
" <td>0000-0003-2407-3557</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Abdul</td>\n",
" <td>Aziz</td>\n",
" <td>Abdul Aziz was born on May 25, 1973, in Brebes...</td>\n",
" <td>[Abdul Aziz, Aziz, Abdul, Aziz, A., Aziz, Abd,...</td>\n",
" <td>[[Google Scholar, https://scholar.google.com/c...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[Ilmu Ekonomi, Dr, Universitas Borobudur, Jak...</td>\n",
" <td>[[Assisten Professor/Dr, Institut Agama Islam ...</td>\n",
" <td>72</td>\n",
" <td>[BASE - Bielefeld Academic Search Engine, Abdu...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, syekhnurjati.ac.id, orcid.org, bl...</td>\n",
" <td>59.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3354430</th>\n",
" <td>0000-0002-3920-7389</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>А.</td>\n",
" <td>Гусев</td>\n",
" <td>Surname, Name Gusev Alexander LeonidovichDate...</td>\n",
" <td>[Alexander L. Gusev , Alexander Leonidovich Gu...</td>\n",
" <td>[[A.L. Gusev Alternative Energy and Ecology, ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[ResearcherID, F-8048-2014], [Scopus Author I...</td>\n",
" <td>[[Chemical technology and cryogenic-vacuum tec...</td>\n",
" <td>[[General Director, Scientific Technical Centr...</td>\n",
" <td>472</td>\n",
" <td>[Publons, DataCite, Scopus - Elsevier, A.L. Gu...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[youtube.com, isjaee.com, researchgate.net, re...</td>\n",
" <td>111.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4004281</th>\n",
" <td>0000-0002-5710-4041</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Ryszard</td>\n",
" <td>Romaniuk</td>\n",
" <td>Professor of Electronics and Communications En...</td>\n",
" <td>[R.Romaniuk, R.S.Romaniuk, Ryszard Romaniuk, R...</td>\n",
" <td>[[Scholar Google, http://scholar.google.pl/cit...</td>\n",
" <td>rrom@ise.pw.edu.pl</td>\n",
" <td>...</td>\n",
" <td>[[ISNI, 0000000071432485], [ResearcherID, B-91...</td>\n",
" <td>[[Faculty of Electronics and Information Techn...</td>\n",
" <td>[[Professor, Institute Director, Politechnika ...</td>\n",
" <td>5008</td>\n",
" <td>[INSPIRE-HEP, ResearcherID, ISNI2ORCID search ...</td>\n",
" <td>ise.pw.edu.pl</td>\n",
" <td>[ise.pw.edu.pl, elka.pw.edu.pl, cern.ch]</td>\n",
" <td>3.0</td>\n",
" <td>[google.pl, publons.com, scopus.com, mendeley....</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4022480</th>\n",
" <td>0000-0003-2450-090X</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Eduard</td>\n",
" <td>Babulak</td>\n",
" <td>Professor Eduard Babulak is accomplished inter...</td>\n",
" <td>[Professor Eduard Babulak]</td>\n",
" <td>[[Honorary Chair, Chief Mentor &amp; Senior Adviso...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 6506867432], [ResearcherID...</td>\n",
" <td>[[Information Technology, Doctor Habilitated (...</td>\n",
" <td>[[Consultant, Horizon 2020 Framework Programme...</td>\n",
" <td>274</td>\n",
" <td>[The Lens, BASE - Bielefeld Academic Search En...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[worldassessmentcouncil.org, spseke.sk, bcs.or...</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6335357</th>\n",
" <td>0000-0003-2593-7134</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Aan</td>\n",
" <td>Jaelani</td>\n",
" <td>All my papers can be downloaded from portal:Re...</td>\n",
" <td>[Jaelani, A., Jaelani, Aan]</td>\n",
" <td>[[Microsoft Academic Research, https://academi...</td>\n",
" <td>aan_jaelani@syekhnurjati.ac.id</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 57195963463], [Loop profil...</td>\n",
" <td>[[Post Graduate, S3/Dr, Universitas Islam Nege...</td>\n",
" <td>[[Dr, Institut Agama Islam Negeri Syekh Nurjat...</td>\n",
" <td>79</td>\n",
" <td>[Publons, Aan Jaelani, Scopus - Elsevier, Dime...</td>\n",
" <td>syekhnurjati.ac.id</td>\n",
" <td>[gmail.com]</td>\n",
" <td>1.0</td>\n",
" <td>[microsoft.com, twitter.com, academia.edu, aca...</td>\n",
" <td>67.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6489838</th>\n",
" <td>0000-0002-9965-2425</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Jaroslaw</td>\n",
" <td>Spychala</td>\n",
" <td>Jaroslaw Spychala has received a doctoral degr...</td>\n",
" <td>[Jaroslaw Jozef Spychala]</td>\n",
" <td>[[RESUME, http://www.biowebspin.com/wp-content...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 7006745874]]</td>\n",
" <td>[[Department of Chemistry, Postdoctoral Associ...</td>\n",
" <td>[[Assistant Professor, Adam Mickiewicz Univers...</td>\n",
" <td>29</td>\n",
" <td>[Scopus - Elsevier]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[biowebspin.com, biowebspin.com, google.com, l...</td>\n",
" <td>73.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7570584</th>\n",
" <td>0000-0003-2183-8112</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Pelayo Munhoz</td>\n",
" <td>Olea</td>\n",
" <td>Pós-Doutorado em Gestão Ambiental pela Univers...</td>\n",
" <td>[ Munhoz, Pelayo Olea, Olea, Pelayo, Olea, P...</td>\n",
" <td>[[Currículo Lattes, http://lattes.cnpq.br/6209...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 55175503300], [ResearcherI...</td>\n",
" <td>[[, Postdoctoral in Environmental Sustainabili...</td>\n",
" <td>[[Professor, Universidade Federal do Rio Grand...</td>\n",
" <td>1105</td>\n",
" <td>[The Lens, Pelayo Munhoz Olea, Dimensions, BAS...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c...</td>\n",
" <td>61.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10240510</th>\n",
" <td>0000-0002-6938-9638</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Adolfo</td>\n",
" <td>Catral Sanabria</td>\n",
" <td>My education is in computer science, mathemati...</td>\n",
" <td>NaN</td>\n",
" <td>[[ResearchGate Adolfo Catral , https://www.res...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[Loop profile, 747193]]</td>\n",
" <td>[[Education, Capacitación para la enseñanza en...</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" <td>[BASE - Bielefeld Academic Search Engine, Data...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[researchgate.net, youtube.com, linkedin.com, ...</td>\n",
" <td>152.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10448304</th>\n",
" <td>0000-0002-4062-3603</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>JUAN DE DIOS</td>\n",
" <td>BELTRÁN MANCILLA</td>\n",
" <td>JUAN DE DIOS BELTRÁN MANCILLA (*) Filósofo aut...</td>\n",
" <td>[Juan de Dios Beltrán Mancilla, FILÓSOFO AUTOD...</td>\n",
" <td>[[01.- Juan de Dios Beltrán Mancilla. Teoría O...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[, DIPLOMADO EN PRACTICAS DIRECTIVAS PARA OR...</td>\n",
" <td>[[INSPECTOR GENERAL JORNADA VESPERTINA // De 2...</td>\n",
" <td>11</td>\n",
" <td>[JUAN DE DIOS BELTR´´ÁN MANCILLA]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[yumpu.com, ijopm.org, google.com, blogspot.co...</td>\n",
" <td>69.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10663894</th>\n",
" <td>0000-0002-3997-5070</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Dr. Parameshachari</td>\n",
" <td>B D</td>\n",
" <td>Dr. Parameshachari B DACM Distinguished Speake...</td>\n",
" <td>[Dr. PARAMESHACHARI B D]</td>\n",
" <td>[[GSSSIETW,MYSURU, http://geethashishu.in/], [...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[ResearcherID, F-7045-2018], [Scopus Author I...</td>\n",
" <td>[[Electronics and Communication Engineering, P...</td>\n",
" <td>[[ACM Distinguished Speaker (Volunteer), Assoc...</td>\n",
" <td>93</td>\n",
" <td>[Publons, Multidisciplinary Digital Publishing...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[geethashishu.in, geethashishu.in, acm.org, go...</td>\n",
" <td>71.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>13 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email \\\n",
"482862 0000-0003-4948-9268 1 1 \n",
"554859 0000-0002-1929-6054 1 1 \n",
"1381092 0000-0002-9025-8632 1 1 \n",
"2679353 0000-0003-2407-3557 1 1 \n",
"3354430 0000-0002-3920-7389 1 1 \n",
"4004281 0000-0002-5710-4041 1 1 \n",
"4022480 0000-0003-2450-090X 1 1 \n",
"6335357 0000-0003-2593-7134 1 1 \n",
"6489838 0000-0002-9965-2425 1 1 \n",
"7570584 0000-0003-2183-8112 1 1 \n",
"10240510 0000-0002-6938-9638 1 1 \n",
"10448304 0000-0002-4062-3603 1 1 \n",
"10663894 0000-0002-3997-5070 1 1 \n",
"\n",
" verified_primary_email given_names family_name \\\n",
"482862 1 Gustavo Duperré \n",
"554859 1 Franklin Américo Canaza Choque \n",
"1381092 1 buycannabis dispensary \n",
"2679353 1 Abdul Aziz \n",
"3354430 1 А. Гусев \n",
"4004281 1 Ryszard Romaniuk \n",
"4022480 1 Eduard Babulak \n",
"6335357 1 Aan Jaelani \n",
"6489838 1 Jaroslaw Spychala \n",
"7570584 1 Pelayo Munhoz Olea \n",
"10240510 1 Adolfo Catral Sanabria \n",
"10448304 1 JUAN DE DIOS BELTRÁN MANCILLA \n",
"10663894 1 Dr. Parameshachari B D \n",
"\n",
" biography \\\n",
"482862 Gustavo Norberto Duperré graduated in Arts and... \n",
"554859 Docente-Investigador Social. Maestrando en Der... \n",
"1381092 We procure and deliver premium cannabis strain... \n",
"2679353 Abdul Aziz was born on May 25, 1973, in Brebes... \n",
"3354430 Surname, Name Gusev Alexander LeonidovichDate... \n",
"4004281 Professor of Electronics and Communications En... \n",
"4022480 Professor Eduard Babulak is accomplished inter... \n",
"6335357 All my papers can be downloaded from portal:Re... \n",
"6489838 Jaroslaw Spychala has received a doctoral degr... \n",
"7570584 Pós-Doutorado em Gestão Ambiental pela Univers... \n",
"10240510 My education is in computer science, mathemati... \n",
"10448304 JUAN DE DIOS BELTRÁN MANCILLA (*) Filósofo aut... \n",
"10663894 Dr. Parameshachari B DACM Distinguished Speake... \n",
"\n",
" other_names \\\n",
"482862 [Gustavo Norberto Duperré, Duperré, G. N.] \n",
"554859 [Franklin Américo Canaza-Choque , Franklin A. ... \n",
"1381092 [We procure and deliver premium cannabis strai... \n",
"2679353 [Abdul Aziz, Aziz, Abdul, Aziz, A., Aziz, Abd,... \n",
"3354430 [Alexander L. Gusev , Alexander Leonidovich Gu... \n",
"4004281 [R.Romaniuk, R.S.Romaniuk, Ryszard Romaniuk, R... \n",
"4022480 [Professor Eduard Babulak] \n",
"6335357 [Jaelani, A., Jaelani, Aan] \n",
"6489838 [Jaroslaw Jozef Spychala] \n",
"7570584 [ Munhoz, Pelayo Olea, Olea, Pelayo, Olea, P... \n",
"10240510 NaN \n",
"10448304 [Juan de Dios Beltrán Mancilla, FILÓSOFO AUTOD... \n",
"10663894 [Dr. PARAMESHACHARI B D] \n",
"\n",
" urls \\\n",
"482862 [[Gis in Cultural Heritage - ICOMOS România, h... \n",
"554859 [[Consejo Nacional de Ciencia, Tecnología e In... \n",
"1381092 [[find your cannabis & marijuana dispensary , ... \n",
"2679353 [[Google Scholar, https://scholar.google.com/c... \n",
"3354430 [[A.L. Gusev Alternative Energy and Ecology, ... \n",
"4004281 [[Scholar Google, http://scholar.google.pl/cit... \n",
"4022480 [[Honorary Chair, Chief Mentor & Senior Adviso... \n",
"6335357 [[Microsoft Academic Research, https://academi... \n",
"6489838 [[RESUME, http://www.biowebspin.com/wp-content... \n",
"7570584 [[Currículo Lattes, http://lattes.cnpq.br/6209... \n",
"10240510 [[ResearchGate Adolfo Catral , https://www.res... \n",
"10448304 [[01.- Juan de Dios Beltrán Mancilla. Teoría O... \n",
"10663894 [[GSSSIETW,MYSURU, http://geethashishu.in/], [... \n",
"\n",
" primary_email ... \\\n",
"482862 gustavo.duperre@usal.edu.ar ... \n",
"554859 Leo_123fa@hotmail.com ... \n",
"1381092 NaN ... \n",
"2679353 NaN ... \n",
"3354430 NaN ... \n",
"4004281 rrom@ise.pw.edu.pl ... \n",
"4022480 NaN ... \n",
"6335357 aan_jaelani@syekhnurjati.ac.id ... \n",
"6489838 NaN ... \n",
"7570584 NaN ... \n",
"10240510 NaN ... \n",
"10448304 NaN ... \n",
"10663894 NaN ... \n",
"\n",
" external_ids \\\n",
"482862 [[Scopus Author ID, 57195936346], [ResearcherI... \n",
"554859 [[ResearcherID, P-8613-2018], [Loop profile, 8... \n",
"1381092 NaN \n",
"2679353 NaN \n",
"3354430 [[ResearcherID, F-8048-2014], [Scopus Author I... \n",
"4004281 [[ISNI, 0000000071432485], [ResearcherID, B-91... \n",
"4022480 [[Scopus Author ID, 6506867432], [ResearcherID... \n",
"6335357 [[Scopus Author ID, 57195963463], [Loop profil... \n",
"6489838 [[Scopus Author ID, 7006745874]] \n",
"7570584 [[Scopus Author ID, 55175503300], [ResearcherI... \n",
"10240510 [[Loop profile, 747193]] \n",
"10448304 NaN \n",
"10663894 [[ResearcherID, F-7045-2018], [Scopus Author I... \n",
"\n",
" education \\\n",
"482862 [[Programme in History, History of Art and Ter... \n",
"554859 [[Facultad de Ciencias de la Educación , Maest... \n",
"1381092 NaN \n",
"2679353 [[Ilmu Ekonomi, Dr, Universitas Borobudur, Jak... \n",
"3354430 [[Chemical technology and cryogenic-vacuum tec... \n",
"4004281 [[Faculty of Electronics and Information Techn... \n",
"4022480 [[Information Technology, Doctor Habilitated (... \n",
"6335357 [[Post Graduate, S3/Dr, Universitas Islam Nege... \n",
"6489838 [[Department of Chemistry, Postdoctoral Associ... \n",
"7570584 [[, Postdoctoral in Environmental Sustainabili... \n",
"10240510 [[Education, Capacitación para la enseñanza en... \n",
"10448304 [[, DIPLOMADO EN PRACTICAS DIRECTIVAS PARA OR... \n",
"10663894 [[Electronics and Communication Engineering, P... \n",
"\n",
" employment n_works \\\n",
"482862 [[Titular Professor, Dirección General de Cult... 13 \n",
"554859 [[Investigador Social, Universidad Católica de... 38 \n",
"1381092 NaN 10 \n",
"2679353 [[Assisten Professor/Dr, Institut Agama Islam ... 72 \n",
"3354430 [[General Director, Scientific Technical Centr... 472 \n",
"4004281 [[Professor, Institute Director, Politechnika ... 5008 \n",
"4022480 [[Consultant, Horizon 2020 Framework Programme... 274 \n",
"6335357 [[Dr, Institut Agama Islam Negeri Syekh Nurjat... 79 \n",
"6489838 [[Assistant Professor, Adam Mickiewicz Univers... 29 \n",
"7570584 [[Professor, Universidade Federal do Rio Grand... 1105 \n",
"10240510 NaN 2023 \n",
"10448304 [[INSPECTOR GENERAL JORNADA VESPERTINA // De 2... 11 \n",
"10663894 [[ACM Distinguished Speaker (Volunteer), Assoc... 93 \n",
"\n",
" works_source \\\n",
"482862 [Gustavo Duperré, Scopus - Elsevier, Publons, ... \n",
"554859 [ResearcherID, BASE - Bielefeld Academic Searc... \n",
"1381092 [goowonderland dispensary] \n",
"2679353 [BASE - Bielefeld Academic Search Engine, Abdu... \n",
"3354430 [Publons, DataCite, Scopus - Elsevier, A.L. Gu... \n",
"4004281 [INSPIRE-HEP, ResearcherID, ISNI2ORCID search ... \n",
"4022480 [The Lens, BASE - Bielefeld Academic Search En... \n",
"6335357 [Publons, Aan Jaelani, Scopus - Elsevier, Dime... \n",
"6489838 [Scopus - Elsevier] \n",
"7570584 [The Lens, Pelayo Munhoz Olea, Dimensions, BAS... \n",
"10240510 [BASE - Bielefeld Academic Search Engine, Data... \n",
"10448304 [JUAN DE DIOS BELTR´´ÁN MANCILLA] \n",
"10663894 [Publons, Multidisciplinary Digital Publishing... \n",
"\n",
" primary_email_domain \\\n",
"482862 usal.edu.ar \n",
"554859 hotmail.com \n",
"1381092 NaN \n",
"2679353 NaN \n",
"3354430 NaN \n",
"4004281 ise.pw.edu.pl \n",
"4022480 NaN \n",
"6335357 syekhnurjati.ac.id \n",
"6489838 NaN \n",
"7570584 NaN \n",
"10240510 NaN \n",
"10448304 NaN \n",
"10663894 NaN \n",
"\n",
" other_email_domains n_emails \\\n",
"482862 NaN NaN \n",
"554859 [gmail.com, gmail.com, hotmail.com, baldwin.ed... 5.0 \n",
"1381092 NaN NaN \n",
"2679353 NaN NaN \n",
"3354430 NaN NaN \n",
"4004281 [ise.pw.edu.pl, elka.pw.edu.pl, cern.ch] 3.0 \n",
"4022480 NaN NaN \n",
"6335357 [gmail.com] 1.0 \n",
"6489838 NaN NaN \n",
"7570584 NaN NaN \n",
"10240510 NaN NaN \n",
"10448304 NaN NaN \n",
"10663894 NaN NaN \n",
"\n",
" url_domains n_urls \n",
"482862 [icomos.ro, unirioja.es, unirioja.es, unc.edu.... 51.0 \n",
"554859 [concytec.gob.pe, redalyc.org, redalyc.org, un... 61.0 \n",
"1381092 [goowonderland.com, goowonderland.com, goowond... 81.0 \n",
"2679353 [google.com, syekhnurjati.ac.id, orcid.org, bl... 59.0 \n",
"3354430 [youtube.com, isjaee.com, researchgate.net, re... 111.0 \n",
"4004281 [google.pl, publons.com, scopus.com, mendeley.... 114.0 \n",
"4022480 [worldassessmentcouncil.org, spseke.sk, bcs.or... 114.0 \n",
"6335357 [microsoft.com, twitter.com, academia.edu, aca... 67.0 \n",
"6489838 [biowebspin.com, biowebspin.com, google.com, l... 73.0 \n",
"7570584 [cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c... 61.0 \n",
"10240510 [researchgate.net, youtube.com, linkedin.com, ... 152.0 \n",
"10448304 [yumpu.com, ijopm.org, google.com, blogspot.co... 69.0 \n",
"10663894 [geethashishu.in, geethashishu.in, acm.org, go... 71.0 \n",
"\n",
"[13 rows x 22 columns]"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['url_domains'].str.len() > 50) & (df['n_works'] > 0)]"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45566</th>\n",
" <td>0000-0003-1948-3180</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Mark</td>\n",
" <td>Katz</td>\n",
" <td>Mark N. Katz is a professor of government and ...</td>\n",
" <td>NaN</td>\n",
" <td>[[Adjusting to Change: American Foreign Policy...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 25649901800]]</td>\n",
" <td>[[Political Science, Ph.D., Massachusetts Inst...</td>\n",
" <td>[[Professor of Government and Politics, George...</td>\n",
" <td>58</td>\n",
" <td>[Scopus - Elsevier]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[wordpress.com, marknkatz.com, gmu.edu, atlant...</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72674</th>\n",
" <td>0000-0002-2000-8339</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Phòng khám tư nhân Hà Nội</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Sức khỏe, https://onhealth.vn/], [Khám phụ k...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>[Phòng khám tư nhân Hà Nội]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[onhealth.vn, onhealth.vn, onhealth.vn, onheal...</td>\n",
" <td>49.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>172820</th>\n",
" <td>0000-0001-9293-2224</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Juan Carlos</td>\n",
" <td>Garcia Hoyos</td>\n",
" <td>My name is Juan Carlos García Hoyos. I was bor...</td>\n",
" <td>[Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /...</td>\n",
" <td>[[Air Force Office of Scientific Research (WRI...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[Faculty of Philosophy, History - Ph.D., Char...</td>\n",
" <td>[[responsible for the Project Service Level Ag...</td>\n",
" <td>20</td>\n",
" <td>[Juan Carlos Garcia Hoyos]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[af.mil, gst.com, govtribe.com, sbir.gov, open...</td>\n",
" <td>28.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>209505</th>\n",
" <td>0000-0003-3045-0056</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Ananda</td>\n",
" <td>Majumdar</td>\n",
" <td>I am Ananda Majumdar, Child Care Educator at B...</td>\n",
" <td>NaN</td>\n",
" <td>[[Migration Scholar and Ananda , https://grfdt...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[Education , B.Ed. After Degree , University ...</td>\n",
" <td>[[General Coordinator- University of Alberta C...</td>\n",
" <td>43</td>\n",
" <td>[Ananda Majumdar]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[grfdt.com, linkedin.com, academia.edu, resear...</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>259877</th>\n",
" <td>0000-0003-1815-5732</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>JAS</td>\n",
" <td>(Jurnal Akuntansi Syariah)</td>\n",
" <td>JAS (Jurnal Akuntansi Syariah) published in pr...</td>\n",
" <td>NaN</td>\n",
" <td>[[Website, https://ejournal.stiesyariahbengkal...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>67</td>\n",
" <td>[JAS (Jurnal Akuntansi Syariah)]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[stiesyariahbengkalis.ac.id, lipi.go.id, cross...</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10494820</th>\n",
" <td>0000-0002-1324-7171</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Vanesa Natalia</td>\n",
" <td>Rodriguez</td>\n",
" <td>Nombre y Apellido: Vanesa Natalia Rodriguez. ...</td>\n",
" <td>[Vanesa Rodriguez, Vanesa N. Rodriguez]</td>\n",
" <td>[[De rufianes y franchutas Representaciones y ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[, Maestría en Ciencias Sociales con Mención ...</td>\n",
" <td>[[Profesora, Universidad Nacional de La Matanz...</td>\n",
" <td>7</td>\n",
" <td>[Vanesa Natalia Rodriguez]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[unlam.edu.ar, unirioja.es, amazon.fr, abebook...</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10495806</th>\n",
" <td>0000-0002-1700-8311</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Fix-IT</td>\n",
" <td>Rite</td>\n",
" <td>NaN</td>\n",
" <td>[Best Heating &amp; Plumbing Company]</td>\n",
" <td>[[Website, https://fix-itrite.com], [Muckrack,...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[Fix-It Rite]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[fix-itrite.com, muckrack.com, tumblr.com, dri...</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10633545</th>\n",
" <td>0000-0003-2676-4431</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Benny</td>\n",
" <td>Soewandi</td>\n",
" <td>NaN</td>\n",
" <td>[Benny Soewandi]</td>\n",
" <td>[[Conservation Efforts as a Result of Theoreti...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Membership, Paguyuban Pelestarian Budaya Ban...</td>\n",
" <td>2</td>\n",
" <td>[Benny Soewandi]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[wordpress.com, wordpress.com, linkedin.com, f...</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10648241</th>\n",
" <td>0000-0001-8157-0600</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Bijan</td>\n",
" <td>Yavar</td>\n",
" <td>Senior Research Assistant and Phd Student in O...</td>\n",
" <td>[B. Yavar, Yavar Bijan]</td>\n",
" <td>[[Web of Science (Pub) Researcher ID: A-3544-2...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 56556873600]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6</td>\n",
" <td>[Scopus - Elsevier]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[publons.com, articulate.com, zenodo.org, orci...</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10679699</th>\n",
" <td>0000-0002-9874-1450</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>FENGZHI</td>\n",
" <td>WU</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[A Systematic Study on the Dynamic Softening ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>[FENGZHI WU]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[springer.com, sciencedirect.com, sciencedirec...</td>\n",
" <td>23.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>139 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email \\\n",
"45566 0000-0003-1948-3180 1 1 \n",
"72674 0000-0002-2000-8339 1 1 \n",
"172820 0000-0001-9293-2224 1 1 \n",
"209505 0000-0003-3045-0056 1 1 \n",
"259877 0000-0003-1815-5732 1 1 \n",
"... ... ... ... \n",
"10494820 0000-0002-1324-7171 1 1 \n",
"10495806 0000-0002-1700-8311 1 1 \n",
"10633545 0000-0003-2676-4431 1 1 \n",
"10648241 0000-0001-8157-0600 1 1 \n",
"10679699 0000-0002-9874-1450 1 1 \n",
"\n",
" verified_primary_email given_names \\\n",
"45566 1 Mark \n",
"72674 1 Phòng khám tư nhân Hà Nội \n",
"172820 1 Juan Carlos \n",
"209505 1 Ananda \n",
"259877 1 JAS \n",
"... ... ... \n",
"10494820 1 Vanesa Natalia \n",
"10495806 1 Fix-IT \n",
"10633545 1 Benny \n",
"10648241 1 Bijan \n",
"10679699 1 FENGZHI \n",
"\n",
" family_name \\\n",
"45566 Katz \n",
"72674 NaN \n",
"172820 Garcia Hoyos \n",
"209505 Majumdar \n",
"259877 (Jurnal Akuntansi Syariah) \n",
"... ... \n",
"10494820 Rodriguez \n",
"10495806 Rite \n",
"10633545 Soewandi \n",
"10648241 Yavar \n",
"10679699 WU \n",
"\n",
" biography \\\n",
"45566 Mark N. Katz is a professor of government and ... \n",
"72674 NaN \n",
"172820 My name is Juan Carlos García Hoyos. I was bor... \n",
"209505 I am Ananda Majumdar, Child Care Educator at B... \n",
"259877 JAS (Jurnal Akuntansi Syariah) published in pr... \n",
"... ... \n",
"10494820 Nombre y Apellido: Vanesa Natalia Rodriguez. ... \n",
"10495806 NaN \n",
"10633545 NaN \n",
"10648241 Senior Research Assistant and Phd Student in O... \n",
"10679699 NaN \n",
"\n",
" other_names \\\n",
"45566 NaN \n",
"72674 NaN \n",
"172820 [Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /... \n",
"209505 NaN \n",
"259877 NaN \n",
"... ... \n",
"10494820 [Vanesa Rodriguez, Vanesa N. Rodriguez] \n",
"10495806 [Best Heating & Plumbing Company] \n",
"10633545 [Benny Soewandi] \n",
"10648241 [B. Yavar, Yavar Bijan] \n",
"10679699 NaN \n",
"\n",
" urls primary_email \\\n",
"45566 [[Adjusting to Change: American Foreign Policy... NaN \n",
"72674 [[Sức khỏe, https://onhealth.vn/], [Khám phụ k... NaN \n",
"172820 [[Air Force Office of Scientific Research (WRI... NaN \n",
"209505 [[Migration Scholar and Ananda , https://grfdt... NaN \n",
"259877 [[Website, https://ejournal.stiesyariahbengkal... NaN \n",
"... ... ... \n",
"10494820 [[De rufianes y franchutas Representaciones y ... NaN \n",
"10495806 [[Website, https://fix-itrite.com], [Muckrack,... NaN \n",
"10633545 [[Conservation Efforts as a Result of Theoreti... NaN \n",
"10648241 [[Web of Science (Pub) Researcher ID: A-3544-2... NaN \n",
"10679699 [[A Systematic Study on the Dynamic Softening ... NaN \n",
"\n",
" ... external_ids \\\n",
"45566 ... [[Scopus Author ID, 25649901800]] \n",
"72674 ... NaN \n",
"172820 ... NaN \n",
"209505 ... NaN \n",
"259877 ... NaN \n",
"... ... ... \n",
"10494820 ... NaN \n",
"10495806 ... NaN \n",
"10633545 ... NaN \n",
"10648241 ... [[Scopus Author ID, 56556873600]] \n",
"10679699 ... NaN \n",
"\n",
" education \\\n",
"45566 [[Political Science, Ph.D., Massachusetts Inst... \n",
"72674 NaN \n",
"172820 [[Faculty of Philosophy, History - Ph.D., Char... \n",
"209505 [[Education , B.Ed. After Degree , University ... \n",
"259877 NaN \n",
"... ... \n",
"10494820 [[, Maestría en Ciencias Sociales con Mención ... \n",
"10495806 NaN \n",
"10633545 NaN \n",
"10648241 NaN \n",
"10679699 NaN \n",
"\n",
" employment n_works \\\n",
"45566 [[Professor of Government and Politics, George... 58 \n",
"72674 NaN 4 \n",
"172820 [[responsible for the Project Service Level Ag... 20 \n",
"209505 [[General Coordinator- University of Alberta C... 43 \n",
"259877 NaN 67 \n",
"... ... ... \n",
"10494820 [[Profesora, Universidad Nacional de La Matanz... 7 \n",
"10495806 NaN 1 \n",
"10633545 [[Membership, Paguyuban Pelestarian Budaya Ban... 2 \n",
"10648241 NaN 6 \n",
"10679699 NaN 3 \n",
"\n",
" works_source primary_email_domain \\\n",
"45566 [Scopus - Elsevier] NaN \n",
"72674 [Phòng khám tư nhân Hà Nội] NaN \n",
"172820 [Juan Carlos Garcia Hoyos] NaN \n",
"209505 [Ananda Majumdar] NaN \n",
"259877 [JAS (Jurnal Akuntansi Syariah)] NaN \n",
"... ... ... \n",
"10494820 [Vanesa Natalia Rodriguez] NaN \n",
"10495806 [Fix-It Rite] NaN \n",
"10633545 [Benny Soewandi] NaN \n",
"10648241 [Scopus - Elsevier] NaN \n",
"10679699 [FENGZHI WU] NaN \n",
"\n",
" other_email_domains n_emails \\\n",
"45566 NaN NaN \n",
"72674 NaN NaN \n",
"172820 NaN NaN \n",
"209505 NaN NaN \n",
"259877 NaN NaN \n",
"... ... ... \n",
"10494820 NaN NaN \n",
"10495806 NaN NaN \n",
"10633545 NaN NaN \n",
"10648241 NaN NaN \n",
"10679699 NaN NaN \n",
"\n",
" url_domains n_urls \n",
"45566 [wordpress.com, marknkatz.com, gmu.edu, atlant... 16.0 \n",
"72674 [onhealth.vn, onhealth.vn, onhealth.vn, onheal... 49.0 \n",
"172820 [af.mil, gst.com, govtribe.com, sbir.gov, open... 28.0 \n",
"209505 [grfdt.com, linkedin.com, academia.edu, resear... 24.0 \n",
"259877 [stiesyariahbengkalis.ac.id, lipi.go.id, cross... 17.0 \n",
"... ... ... \n",
"10494820 [unlam.edu.ar, unirioja.es, amazon.fr, abebook... 19.0 \n",
"10495806 [fix-itrite.com, muckrack.com, tumblr.com, dri... 11.0 \n",
"10633545 [wordpress.com, wordpress.com, linkedin.com, f... 11.0 \n",
"10648241 [publons.com, articulate.com, zenodo.org, orci... 15.0 \n",
"10679699 [springer.com, sciencedirect.com, sciencedirec... 23.0 \n",
"\n",
"[139 rows x 22 columns]"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0003-1948-3180</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Mark</td>\n",
" <td>Katz</td>\n",
" <td>Mark N. Katz is a professor of government and ...</td>\n",
" <td>NaN</td>\n",
" <td>[[Adjusting to Change: American Foreign Policy...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 25649901800]]</td>\n",
" <td>[[Political Science, Ph.D., Massachusetts Inst...</td>\n",
" <td>[[Professor of Government and Politics, George...</td>\n",
" <td>58</td>\n",
" <td>Scopus - Elsevier</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[wordpress.com, marknkatz.com, gmu.edu, atlant...</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0002-2000-8339</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Phòng khám tư nhân Hà Nội</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Sức khỏe, https://onhealth.vn/], [Khám phụ k...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>Phòng khám tư nhân Hà Nội</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[onhealth.vn, onhealth.vn, onhealth.vn, onheal...</td>\n",
" <td>49.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-9293-2224</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Juan Carlos</td>\n",
" <td>Garcia Hoyos</td>\n",
" <td>My name is Juan Carlos García Hoyos. I was bor...</td>\n",
" <td>[Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /...</td>\n",
" <td>[[Air Force Office of Scientific Research (WRI...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[Faculty of Philosophy, History - Ph.D., Char...</td>\n",
" <td>[[responsible for the Project Service Level Ag...</td>\n",
" <td>20</td>\n",
" <td>Juan Carlos Garcia Hoyos</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[af.mil, gst.com, govtribe.com, sbir.gov, open...</td>\n",
" <td>28.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0003-3045-0056</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Ananda</td>\n",
" <td>Majumdar</td>\n",
" <td>I am Ananda Majumdar, Child Care Educator at B...</td>\n",
" <td>NaN</td>\n",
" <td>[[Migration Scholar and Ananda , https://grfdt...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[Education , B.Ed. After Degree , University ...</td>\n",
" <td>[[General Coordinator- University of Alberta C...</td>\n",
" <td>43</td>\n",
" <td>Ananda Majumdar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[grfdt.com, linkedin.com, academia.edu, resear...</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0003-1815-5732</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>JAS</td>\n",
" <td>(Jurnal Akuntansi Syariah)</td>\n",
" <td>JAS (Jurnal Akuntansi Syariah) published in pr...</td>\n",
" <td>NaN</td>\n",
" <td>[[Website, https://ejournal.stiesyariahbengkal...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>67</td>\n",
" <td>JAS (Jurnal Akuntansi Syariah)</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[stiesyariahbengkalis.ac.id, lipi.go.id, cross...</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>0000-0002-1324-7171</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Vanesa Natalia</td>\n",
" <td>Rodriguez</td>\n",
" <td>Nombre y Apellido: Vanesa Natalia Rodriguez. ...</td>\n",
" <td>[Vanesa Rodriguez, Vanesa N. Rodriguez]</td>\n",
" <td>[[De rufianes y franchutas Representaciones y ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[, Maestría en Ciencias Sociales con Mención ...</td>\n",
" <td>[[Profesora, Universidad Nacional de La Matanz...</td>\n",
" <td>7</td>\n",
" <td>Vanesa Natalia Rodriguez</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[unlam.edu.ar, unirioja.es, amazon.fr, abebook...</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>0000-0002-1700-8311</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Fix-IT</td>\n",
" <td>Rite</td>\n",
" <td>NaN</td>\n",
" <td>[Best Heating &amp; Plumbing Company]</td>\n",
" <td>[[Website, https://fix-itrite.com], [Muckrack,...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>Fix-It Rite</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[fix-itrite.com, muckrack.com, tumblr.com, dri...</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>0000-0003-2676-4431</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Benny</td>\n",
" <td>Soewandi</td>\n",
" <td>NaN</td>\n",
" <td>[Benny Soewandi]</td>\n",
" <td>[[Conservation Efforts as a Result of Theoreti...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Membership, Paguyuban Pelestarian Budaya Ban...</td>\n",
" <td>2</td>\n",
" <td>Benny Soewandi</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[wordpress.com, wordpress.com, linkedin.com, f...</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>0000-0001-8157-0600</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Bijan</td>\n",
" <td>Yavar</td>\n",
" <td>Senior Research Assistant and Phd Student in O...</td>\n",
" <td>[B. Yavar, Yavar Bijan]</td>\n",
" <td>[[Web of Science (Pub) Researcher ID: A-3544-2...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[Scopus Author ID, 56556873600]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6</td>\n",
" <td>Scopus - Elsevier</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[publons.com, articulate.com, zenodo.org, orci...</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>0000-0002-9874-1450</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>FENGZHI</td>\n",
" <td>WU</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[A Systematic Study on the Dynamic Softening ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>FENGZHI WU</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[springer.com, sciencedirect.com, sciencedirec...</td>\n",
" <td>23.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>139 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"0 0000-0003-1948-3180 1 1 1 \n",
"1 0000-0002-2000-8339 1 1 1 \n",
"2 0000-0001-9293-2224 1 1 1 \n",
"3 0000-0003-3045-0056 1 1 1 \n",
"4 0000-0003-1815-5732 1 1 1 \n",
".. ... ... ... ... \n",
"134 0000-0002-1324-7171 1 1 1 \n",
"135 0000-0002-1700-8311 1 1 1 \n",
"136 0000-0003-2676-4431 1 1 1 \n",
"137 0000-0001-8157-0600 1 1 1 \n",
"138 0000-0002-9874-1450 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"0 Mark Katz \n",
"1 Phòng khám tư nhân Hà Nội NaN \n",
"2 Juan Carlos Garcia Hoyos \n",
"3 Ananda Majumdar \n",
"4 JAS (Jurnal Akuntansi Syariah) \n",
".. ... ... \n",
"134 Vanesa Natalia Rodriguez \n",
"135 Fix-IT Rite \n",
"136 Benny Soewandi \n",
"137 Bijan Yavar \n",
"138 FENGZHI WU \n",
"\n",
" biography \\\n",
"0 Mark N. Katz is a professor of government and ... \n",
"1 NaN \n",
"2 My name is Juan Carlos García Hoyos. I was bor... \n",
"3 I am Ananda Majumdar, Child Care Educator at B... \n",
"4 JAS (Jurnal Akuntansi Syariah) published in pr... \n",
".. ... \n",
"134 Nombre y Apellido: Vanesa Natalia Rodriguez. ... \n",
"135 NaN \n",
"136 NaN \n",
"137 Senior Research Assistant and Phd Student in O... \n",
"138 NaN \n",
"\n",
" other_names \\\n",
"0 NaN \n",
"1 NaN \n",
"2 [Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /... \n",
"3 NaN \n",
"4 NaN \n",
".. ... \n",
"134 [Vanesa Rodriguez, Vanesa N. Rodriguez] \n",
"135 [Best Heating & Plumbing Company] \n",
"136 [Benny Soewandi] \n",
"137 [B. Yavar, Yavar Bijan] \n",
"138 NaN \n",
"\n",
" urls primary_email ... \\\n",
"0 [[Adjusting to Change: American Foreign Policy... NaN ... \n",
"1 [[Sức khỏe, https://onhealth.vn/], [Khám phụ k... NaN ... \n",
"2 [[Air Force Office of Scientific Research (WRI... NaN ... \n",
"3 [[Migration Scholar and Ananda , https://grfdt... NaN ... \n",
"4 [[Website, https://ejournal.stiesyariahbengkal... NaN ... \n",
".. ... ... ... \n",
"134 [[De rufianes y franchutas Representaciones y ... NaN ... \n",
"135 [[Website, https://fix-itrite.com], [Muckrack,... NaN ... \n",
"136 [[Conservation Efforts as a Result of Theoreti... NaN ... \n",
"137 [[Web of Science (Pub) Researcher ID: A-3544-2... NaN ... \n",
"138 [[A Systematic Study on the Dynamic Softening ... NaN ... \n",
"\n",
" external_ids \\\n",
"0 [[Scopus Author ID, 25649901800]] \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
".. ... \n",
"134 NaN \n",
"135 NaN \n",
"136 NaN \n",
"137 [[Scopus Author ID, 56556873600]] \n",
"138 NaN \n",
"\n",
" education \\\n",
"0 [[Political Science, Ph.D., Massachusetts Inst... \n",
"1 NaN \n",
"2 [[Faculty of Philosophy, History - Ph.D., Char... \n",
"3 [[Education , B.Ed. After Degree , University ... \n",
"4 NaN \n",
".. ... \n",
"134 [[, Maestría en Ciencias Sociales con Mención ... \n",
"135 NaN \n",
"136 NaN \n",
"137 NaN \n",
"138 NaN \n",
"\n",
" employment n_works \\\n",
"0 [[Professor of Government and Politics, George... 58 \n",
"1 NaN 4 \n",
"2 [[responsible for the Project Service Level Ag... 20 \n",
"3 [[General Coordinator- University of Alberta C... 43 \n",
"4 NaN 67 \n",
".. ... ... \n",
"134 [[Profesora, Universidad Nacional de La Matanz... 7 \n",
"135 NaN 1 \n",
"136 [[Membership, Paguyuban Pelestarian Budaya Ban... 2 \n",
"137 NaN 6 \n",
"138 NaN 3 \n",
"\n",
" works_source primary_email_domain other_email_domains \\\n",
"0 Scopus - Elsevier NaN NaN \n",
"1 Phòng khám tư nhân Hà Nội NaN NaN \n",
"2 Juan Carlos Garcia Hoyos NaN NaN \n",
"3 Ananda Majumdar NaN NaN \n",
"4 JAS (Jurnal Akuntansi Syariah) NaN NaN \n",
".. ... ... ... \n",
"134 Vanesa Natalia Rodriguez NaN NaN \n",
"135 Fix-It Rite NaN NaN \n",
"136 Benny Soewandi NaN NaN \n",
"137 Scopus - Elsevier NaN NaN \n",
"138 FENGZHI WU NaN NaN \n",
"\n",
" n_emails url_domains n_urls \n",
"0 NaN [wordpress.com, marknkatz.com, gmu.edu, atlant... 16.0 \n",
"1 NaN [onhealth.vn, onhealth.vn, onhealth.vn, onheal... 49.0 \n",
"2 NaN [af.mil, gst.com, govtribe.com, sbir.gov, open... 28.0 \n",
"3 NaN [grfdt.com, linkedin.com, academia.edu, resear... 24.0 \n",
"4 NaN [stiesyariahbengkalis.ac.id, lipi.go.id, cross... 17.0 \n",
".. ... ... ... \n",
"134 NaN [unlam.edu.ar, unirioja.es, amazon.fr, abebook... 19.0 \n",
"135 NaN [fix-itrite.com, muckrack.com, tumblr.com, dri... 11.0 \n",
"136 NaN [wordpress.com, wordpress.com, linkedin.com, f... 11.0 \n",
"137 NaN [publons.com, articulate.com, zenodo.org, orci... 15.0 \n",
"138 NaN [springer.com, sciencedirect.com, sciencedirec... 23.0 \n",
"\n",
"[139 rows x 22 columns]"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_sources = df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)].explode('works_source').reset_index(drop=True)\n",
"exploded_sources"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0002-2000-8339</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Phòng khám tư nhân Hà Nội</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Sức khỏe, https://onhealth.vn/], [Khám phụ k...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>Phòng khám tư nhân Hà Nội</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[onhealth.vn, onhealth.vn, onhealth.vn, onheal...</td>\n",
" <td>49.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-9293-2224</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Juan Carlos</td>\n",
" <td>Garcia Hoyos</td>\n",
" <td>My name is Juan Carlos García Hoyos. I was bor...</td>\n",
" <td>[Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /...</td>\n",
" <td>[[Air Force Office of Scientific Research (WRI...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[Faculty of Philosophy, History - Ph.D., Char...</td>\n",
" <td>[[responsible for the Project Service Level Ag...</td>\n",
" <td>20</td>\n",
" <td>Juan Carlos Garcia Hoyos</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[af.mil, gst.com, govtribe.com, sbir.gov, open...</td>\n",
" <td>28.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0003-3045-0056</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Ananda</td>\n",
" <td>Majumdar</td>\n",
" <td>I am Ananda Majumdar, Child Care Educator at B...</td>\n",
" <td>NaN</td>\n",
" <td>[[Migration Scholar and Ananda , https://grfdt...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[Education , B.Ed. After Degree , University ...</td>\n",
" <td>[[General Coordinator- University of Alberta C...</td>\n",
" <td>43</td>\n",
" <td>Ananda Majumdar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[grfdt.com, linkedin.com, academia.edu, resear...</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0003-1815-5732</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>JAS</td>\n",
" <td>(Jurnal Akuntansi Syariah)</td>\n",
" <td>JAS (Jurnal Akuntansi Syariah) published in pr...</td>\n",
" <td>NaN</td>\n",
" <td>[[Website, https://ejournal.stiesyariahbengkal...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>67</td>\n",
" <td>JAS (Jurnal Akuntansi Syariah)</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[stiesyariahbengkalis.ac.id, lipi.go.id, cross...</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0000-0002-4379-6454</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Caroline Wanjiru</td>\n",
" <td>Kariuki</td>\n",
" <td>Caroline holds a PhD in Economics from Curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>[[Scopus Profile, https://www.scopus.com/dashb...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[Economics, Doctor of Philosophy , Curtin Uni...</td>\n",
" <td>[[Director, Educational Development, Strathmor...</td>\n",
" <td>4</td>\n",
" <td>Caroline Wanjiru Kariuki</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>132</th>\n",
" <td>0000-0001-6352-7086</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Susan</td>\n",
" <td>Hawthorne</td>\n",
" <td>Susan is a poet, novelist, publisher and Sansk...</td>\n",
" <td>[S. Hawthorne, Susan C. C. Hawthorne]</td>\n",
" <td>[[Spinifex Press, http://www.spinifexpress.com...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>[[ResearcherID, K-6039-2018]]</td>\n",
" <td>[[School of Asian Studies, Honours Sanskrit, A...</td>\n",
" <td>[[Adjunct Professor, James Cook University, To...</td>\n",
" <td>352</td>\n",
" <td>Susan Hawthorne</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[spinifexpress.com.au, linkedin.com, twitter.c...</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>0000-0002-4062-3603</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>JUAN DE DIOS</td>\n",
" <td>BELTRÁN MANCILLA</td>\n",
" <td>JUAN DE DIOS BELTRÁN MANCILLA (*) Filósofo aut...</td>\n",
" <td>[Juan de Dios Beltrán Mancilla, FILÓSOFO AUTOD...</td>\n",
" <td>[[01.- Juan de Dios Beltrán Mancilla. Teoría O...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[, DIPLOMADO EN PRACTICAS DIRECTIVAS PARA OR...</td>\n",
" <td>[[INSPECTOR GENERAL JORNADA VESPERTINA // De 2...</td>\n",
" <td>11</td>\n",
" <td>JUAN DE DIOS BELTR´´ÁN MANCILLA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[yumpu.com, ijopm.org, google.com, blogspot.co...</td>\n",
" <td>69.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>0000-0002-1324-7171</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Vanesa Natalia</td>\n",
" <td>Rodriguez</td>\n",
" <td>Nombre y Apellido: Vanesa Natalia Rodriguez. ...</td>\n",
" <td>[Vanesa Rodriguez, Vanesa N. Rodriguez]</td>\n",
" <td>[[De rufianes y franchutas Representaciones y ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[, Maestría en Ciencias Sociales con Mención ...</td>\n",
" <td>[[Profesora, Universidad Nacional de La Matanz...</td>\n",
" <td>7</td>\n",
" <td>Vanesa Natalia Rodriguez</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[unlam.edu.ar, unirioja.es, amazon.fr, abebook...</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>0000-0003-2676-4431</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Benny</td>\n",
" <td>Soewandi</td>\n",
" <td>NaN</td>\n",
" <td>[Benny Soewandi]</td>\n",
" <td>[[Conservation Efforts as a Result of Theoreti...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Membership, Paguyuban Pelestarian Budaya Ban...</td>\n",
" <td>2</td>\n",
" <td>Benny Soewandi</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[wordpress.com, wordpress.com, linkedin.com, f...</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>0000-0002-9874-1450</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>FENGZHI</td>\n",
" <td>WU</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[A Systematic Study on the Dynamic Softening ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>FENGZHI WU</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[springer.com, sciencedirect.com, sciencedirec...</td>\n",
" <td>23.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>108 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"1 0000-0002-2000-8339 1 1 1 \n",
"2 0000-0001-9293-2224 1 1 1 \n",
"3 0000-0003-3045-0056 1 1 1 \n",
"4 0000-0003-1815-5732 1 1 1 \n",
"5 0000-0002-4379-6454 1 1 1 \n",
".. ... ... ... ... \n",
"132 0000-0001-6352-7086 1 1 1 \n",
"133 0000-0002-4062-3603 1 1 1 \n",
"134 0000-0002-1324-7171 1 1 1 \n",
"136 0000-0003-2676-4431 1 1 1 \n",
"138 0000-0002-9874-1450 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"1 Phòng khám tư nhân Hà Nội NaN \n",
"2 Juan Carlos Garcia Hoyos \n",
"3 Ananda Majumdar \n",
"4 JAS (Jurnal Akuntansi Syariah) \n",
"5 Caroline Wanjiru Kariuki \n",
".. ... ... \n",
"132 Susan Hawthorne \n",
"133 JUAN DE DIOS BELTRÁN MANCILLA \n",
"134 Vanesa Natalia Rodriguez \n",
"136 Benny Soewandi \n",
"138 FENGZHI WU \n",
"\n",
" biography \\\n",
"1 NaN \n",
"2 My name is Juan Carlos García Hoyos. I was bor... \n",
"3 I am Ananda Majumdar, Child Care Educator at B... \n",
"4 JAS (Jurnal Akuntansi Syariah) published in pr... \n",
"5 Caroline holds a PhD in Economics from Curtin ... \n",
".. ... \n",
"132 Susan is a poet, novelist, publisher and Sansk... \n",
"133 JUAN DE DIOS BELTRÁN MANCILLA (*) Filósofo aut... \n",
"134 Nombre y Apellido: Vanesa Natalia Rodriguez. ... \n",
"136 NaN \n",
"138 NaN \n",
"\n",
" other_names \\\n",
"1 NaN \n",
"2 [Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /... \n",
"3 NaN \n",
"4 NaN \n",
"5 NaN \n",
".. ... \n",
"132 [S. Hawthorne, Susan C. C. Hawthorne] \n",
"133 [Juan de Dios Beltrán Mancilla, FILÓSOFO AUTOD... \n",
"134 [Vanesa Rodriguez, Vanesa N. Rodriguez] \n",
"136 [Benny Soewandi] \n",
"138 NaN \n",
"\n",
" urls primary_email ... \\\n",
"1 [[Sức khỏe, https://onhealth.vn/], [Khám phụ k... NaN ... \n",
"2 [[Air Force Office of Scientific Research (WRI... NaN ... \n",
"3 [[Migration Scholar and Ananda , https://grfdt... NaN ... \n",
"4 [[Website, https://ejournal.stiesyariahbengkal... NaN ... \n",
"5 [[Scopus Profile, https://www.scopus.com/dashb... NaN ... \n",
".. ... ... ... \n",
"132 [[Spinifex Press, http://www.spinifexpress.com... NaN ... \n",
"133 [[01.- Juan de Dios Beltrán Mancilla. Teoría O... NaN ... \n",
"134 [[De rufianes y franchutas Representaciones y ... NaN ... \n",
"136 [[Conservation Efforts as a Result of Theoreti... NaN ... \n",
"138 [[A Systematic Study on the Dynamic Softening ... NaN ... \n",
"\n",
" external_ids \\\n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"5 NaN \n",
".. ... \n",
"132 [[ResearcherID, K-6039-2018]] \n",
"133 NaN \n",
"134 NaN \n",
"136 NaN \n",
"138 NaN \n",
"\n",
" education \\\n",
"1 NaN \n",
"2 [[Faculty of Philosophy, History - Ph.D., Char... \n",
"3 [[Education , B.Ed. After Degree , University ... \n",
"4 NaN \n",
"5 [[Economics, Doctor of Philosophy , Curtin Uni... \n",
".. ... \n",
"132 [[School of Asian Studies, Honours Sanskrit, A... \n",
"133 [[, DIPLOMADO EN PRACTICAS DIRECTIVAS PARA OR... \n",
"134 [[, Maestría en Ciencias Sociales con Mención ... \n",
"136 NaN \n",
"138 NaN \n",
"\n",
" employment n_works \\\n",
"1 NaN 4 \n",
"2 [[responsible for the Project Service Level Ag... 20 \n",
"3 [[General Coordinator- University of Alberta C... 43 \n",
"4 NaN 67 \n",
"5 [[Director, Educational Development, Strathmor... 4 \n",
".. ... ... \n",
"132 [[Adjunct Professor, James Cook University, To... 352 \n",
"133 [[INSPECTOR GENERAL JORNADA VESPERTINA // De 2... 11 \n",
"134 [[Profesora, Universidad Nacional de La Matanz... 7 \n",
"136 [[Membership, Paguyuban Pelestarian Budaya Ban... 2 \n",
"138 NaN 3 \n",
"\n",
" works_source primary_email_domain \\\n",
"1 Phòng khám tư nhân Hà Nội NaN \n",
"2 Juan Carlos Garcia Hoyos NaN \n",
"3 Ananda Majumdar NaN \n",
"4 JAS (Jurnal Akuntansi Syariah) NaN \n",
"5 Caroline Wanjiru Kariuki NaN \n",
".. ... ... \n",
"132 Susan Hawthorne NaN \n",
"133 JUAN DE DIOS BELTR´´ÁN MANCILLA NaN \n",
"134 Vanesa Natalia Rodriguez NaN \n",
"136 Benny Soewandi NaN \n",
"138 FENGZHI WU NaN \n",
"\n",
" other_email_domains n_emails \\\n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"5 NaN NaN \n",
".. ... ... \n",
"132 NaN NaN \n",
"133 NaN NaN \n",
"134 NaN NaN \n",
"136 NaN NaN \n",
"138 NaN NaN \n",
"\n",
" url_domains n_urls \n",
"1 [onhealth.vn, onhealth.vn, onhealth.vn, onheal... 49.0 \n",
"2 [af.mil, gst.com, govtribe.com, sbir.gov, open... 28.0 \n",
"3 [grfdt.com, linkedin.com, academia.edu, resear... 24.0 \n",
"4 [stiesyariahbengkalis.ac.id, lipi.go.id, cross... 17.0 \n",
"5 [scopus.com, mendeley.com, publons.com, resear... 13.0 \n",
".. ... ... \n",
"132 [spinifexpress.com.au, linkedin.com, twitter.c... 12.0 \n",
"133 [yumpu.com, ijopm.org, google.com, blogspot.co... 69.0 \n",
"134 [unlam.edu.ar, unirioja.es, amazon.fr, abebook... 19.0 \n",
"136 [wordpress.com, wordpress.com, linkedin.com, f... 11.0 \n",
"138 [springer.com, sciencedirect.com, sciencedirec... 23.0 \n",
"\n",
"[108 rows x 22 columns]"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_sources[exploded_sources.apply(lambda x: x['works_source'].find(x['given_names']) >= 0, axis=1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Works source"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Paste from Miriam"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## External IDs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"External IDs should come from reliable sources. ORCiD registrants cannot add them freely."
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"df['n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 1.285292e+06\n",
"mean 1.357162e+00\n",
"std 6.607097e-01\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 8.000000e+01\n",
"Name: n_ids, dtype: float64"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.n_ids.describe()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9228793</th>\n",
" <td>0000-0002-9554-6633</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>John A</td>\n",
" <td>Williams</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[Aston University profile page, https://resea...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>[[, Aston University, Birmingham, , GB, 1722, ...</td>\n",
" <td>91</td>\n",
" <td>[Aston Research Explorer]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[aston.ac.uk]</td>\n",
" <td>1.0</td>\n",
" <td>80.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 23 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"9228793 0000-0002-9554-6633 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"9228793 John A Williams NaN NaN \n",
"\n",
" urls primary_email ... \\\n",
"9228793 [[Aston University profile page, https://resea... NaN ... \n",
"\n",
" education employment n_works \\\n",
"9228793 NaN [[, Aston University, Birmingham, , GB, 1722, ... 91 \n",
"\n",
" works_source primary_email_domain other_email_domains \\\n",
"9228793 [Aston Research Explorer] NaN NaN \n",
"\n",
" n_emails url_domains n_urls n_ids \n",
"9228793 NaN [aston.ac.uk] 1.0 80.0 \n",
"\n",
"[1 rows x 23 columns]"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_ids == df.n_ids.max()]"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"ids = df[['orcid', 'external_ids']].explode('external_ids').reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>external_ids</th>\n",
" <th>provider</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0000-0001-5004-4608</td>\n",
" <td>[Scopus Author ID, 40661094300]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0000-0001-5008-2479</td>\n",
" <td>[Scopus Author ID, 12789856200]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0000-0001-5008-2479</td>\n",
" <td>[Ciência ID, 2F1C-479B-B071]</td>\n",
" <td>Ciência ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>0000-0001-5010-9539</td>\n",
" <td>[Loop profile, 1098977]</td>\n",
" <td>Loop profile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>0000-0001-5013-6529</td>\n",
" <td>[Scopus Author ID, 8986698300]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11203640</th>\n",
" <td>0000-0003-4985-9169</td>\n",
" <td>[Scopus Author ID, 23972479900]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11203642</th>\n",
" <td>0000-0003-4986-2106</td>\n",
" <td>[Scopus Author ID, 57189299099]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11203643</th>\n",
" <td>0000-0003-4986-2106</td>\n",
" <td>[Loop profile, 947925]</td>\n",
" <td>Loop profile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11203653</th>\n",
" <td>0000-0003-4990-3115</td>\n",
" <td>[Loop profile, 991591]</td>\n",
" <td>Loop profile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11203662</th>\n",
" <td>0000-0003-4994-6043</td>\n",
" <td>[Scopus Author ID, 55220889800]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1744349 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid external_ids \\\n",
"13 0000-0001-5004-4608 [Scopus Author ID, 40661094300] \n",
"24 0000-0001-5008-2479 [Scopus Author ID, 12789856200] \n",
"25 0000-0001-5008-2479 [Ciência ID, 2F1C-479B-B071] \n",
"31 0000-0001-5010-9539 [Loop profile, 1098977] \n",
"42 0000-0001-5013-6529 [Scopus Author ID, 8986698300] \n",
"... ... ... \n",
"11203640 0000-0003-4985-9169 [Scopus Author ID, 23972479900] \n",
"11203642 0000-0003-4986-2106 [Scopus Author ID, 57189299099] \n",
"11203643 0000-0003-4986-2106 [Loop profile, 947925] \n",
"11203653 0000-0003-4990-3115 [Loop profile, 991591] \n",
"11203662 0000-0003-4994-6043 [Scopus Author ID, 55220889800] \n",
"\n",
" provider \n",
"13 Scopus Author ID \n",
"24 Scopus Author ID \n",
"25 Ciência ID \n",
"31 Loop profile \n",
"42 Scopus Author ID \n",
"... ... \n",
"11203640 Scopus Author ID \n",
"11203642 Scopus Author ID \n",
"11203643 Loop profile \n",
"11203653 Loop profile \n",
"11203662 Scopus Author ID \n",
"\n",
"[1744349 rows x 3 columns]"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ids[ids.provider.notna()]"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"ids['provider'] = ids[ids.external_ids.notna()]['external_ids'].apply(lambda x: x[0])"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>external_ids</th>\n",
" <th>provider</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0000-0001-5004-4608</td>\n",
" <td>[Scopus Author ID, 40661094300]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0000-0001-5008-2479</td>\n",
" <td>[Scopus Author ID, 12789856200]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0000-0001-5008-2479</td>\n",
" <td>[Ciência ID, 2F1C-479B-B071]</td>\n",
" <td>Ciência ID</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>0000-0001-5010-9539</td>\n",
" <td>[Loop profile, 1098977]</td>\n",
" <td>Loop profile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>0000-0001-5013-6529</td>\n",
" <td>[Scopus Author ID, 8986698300]</td>\n",
" <td>Scopus Author ID</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid external_ids provider\n",
"13 0000-0001-5004-4608 [Scopus Author ID, 40661094300] Scopus Author ID\n",
"24 0000-0001-5008-2479 [Scopus Author ID, 12789856200] Scopus Author ID\n",
"25 0000-0001-5008-2479 [Ciência ID, 2F1C-479B-B071] Ciência ID\n",
"31 0000-0001-5010-9539 [Loop profile, 1098977] Loop profile\n",
"42 0000-0001-5013-6529 [Scopus Author ID, 8986698300] Scopus Author ID"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ids[ids.provider.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"Scopus Author ID",
"ResearcherID",
"Loop profile",
"Ciência ID",
"Researcher Name Resolver ID",
"中国科学家在线",
"ISNI",
"GND",
"Pitt ID",
"Technical University of Denmark CWIS",
"Sciprofile",
"Researcher ID",
"ID Dialnet",
"Digital author ID",
"Scopus Author ID: ",
"AuthenticusID",
"HKU ResearcherPage",
"UOW Scholars",
"CTI Vitae",
"Scopus Author ID:",
"HKUST Profile",
"Scopus author ID",
"Chalmers ID",
"Scopus ID",
"iAuthor",
"Google Scholar",
"AuthID",
"DAI",
"US EPA VIVO",
"Digital Author ID (DAI)",
"Scopus ID",
"Authenticus",
"Smithsonian Profiles",
"GitHub",
"eScientist",
"VIVO Cornell",
"ResearcherID:",
"Digital Author ID",
"Digital author ID (DAI)",
"ID Dialnet:",
"Dialnet ID",
"KAKEN",
"UNE Researcher ID",
"ResearcherID: ",
"ORCID",
"Custom",
"ORCID iD",
"ScienceOpen",
"ResearcherId",
"Profile system identifier"
],
"y": [
1015175,
543545,
114316,
33870,
7810,
4794,
3054,
2825,
2672,
2486,
2441,
1417,
1167,
1079,
1076,
847,
740,
644,
581,
549,
521,
501,
430,
232,
212,
200,
175,
153,
146,
135,
127,
82,
61,
51,
49,
46,
39,
35,
34,
7,
6,
5,
4,
3,
2,
1,
1,
1,
1,
1
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "IDs provided"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"3b2f617d-b46b-4da1-adbf-6f7980f38a46\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"3b2f617d-b46b-4da1-adbf-6f7980f38a46\")) { Plotly.newPlot( \"3b2f617d-b46b-4da1-adbf-6f7980f38a46\", [{\"type\": \"bar\", \"x\": [\"Scopus Author ID\", \"ResearcherID\", \"Loop profile\", \"Ci\\u00eancia ID\", \"Researcher Name Resolver ID\", \"\\u4e2d\\u56fd\\u79d1\\u5b66\\u5bb6\\u5728\\u7ebf\", \"ISNI\", \"GND\", \"Pitt ID\", \"Technical University of Denmark CWIS\", \"Sciprofile\", \"Researcher ID\", \"ID Dialnet\", \"Digital author ID\", \"Scopus Author ID: \", \"AuthenticusID\", \"HKU ResearcherPage\", \"UOW Scholars\", \"CTI Vitae\", \"Scopus Author ID:\", \"HKUST Profile\", \"Scopus author ID\", \"Chalmers ID\", \"Scopus ID\", \"iAuthor\", \"Google Scholar\", \"AuthID\", \"DAI\", \"US EPA VIVO\", \"Digital Author ID (DAI)\", \"Scopus ID\", \"Authenticus\", \"Smithsonian Profiles\", \"GitHub\", \"eScientist\", \"VIVO Cornell\", \"ResearcherID:\", \"Digital Author ID\", \"Digital author ID (DAI)\", \"ID Dialnet:\", \"Dialnet ID\", \"KAKEN\", \"UNE Researcher ID\", \"ResearcherID: \", \"ORCID\", \"Custom\", \"ORCID iD\", \"ScienceOpen\", \"ResearcherId\", \"Profile system identifier\"], \"y\": [1015175, 543545, 114316, 33870, 7810, 4794, 3054, 2825, 2672, 2486, 2441, 1417, 1167, 1079, 1076, 847, 740, 644, 581, 549, 521, 501, 430, 232, 212, 200, 175, 153, 146, 135, 127, 82, 61, 51, 49, 46, 39, 35, 34, 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"IDs provided\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('3b2f617d-b46b-4da1-adbf-6f7980f38a46');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=ids.groupby('provider').count().sort_values('orcid', ascending=False).index,\n",
" y=ids.groupby('provider').count().sort_values('orcid', ascending=False)['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='IDs provided',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'Scopus Author ID', 'Ciência ID', 'Loop profile',\n",
" 'ResearcherID', 'Researcher Name Resolver ID', 'UOW Scholars',\n",
" '中国科学家在线', 'Pitt ID', 'AuthenticusID', 'Sciprofile', 'GND', 'ISNI',\n",
" 'HKU ResearcherPage', 'CTI Vitae', 'Researcher ID', 'ID Dialnet',\n",
" 'Digital author ID', 'HKUST Profile',\n",
" 'Technical University of Denmark CWIS', 'Scopus Author ID: ',\n",
" 'Digital Author ID (DAI)', 'Scopus Author ID:', 'Google Scholar',\n",
" 'AuthID', 'Digital Author ID', 'iAuthor', 'US EPA VIVO', 'GitHub',\n",
" 'Scopus author ID', 'Chalmers ID', 'Scopus ID', 'Authenticus',\n",
" 'VIVO Cornell', 'Scopus ID', 'ScienceOpen',\n",
" 'Smithsonian Profiles', 'ResearcherID:', 'DAI', 'eScientist',\n",
" 'KAKEN', 'Digital author ID (DAI)', 'ORCID', 'ID Dialnet:',\n",
" 'Dialnet ID', 'UNE Researcher ID', 'ResearcherID: ',\n",
" 'Profile system identifier', 'Custom', 'ResearcherId', 'ORCID iD'],\n",
" dtype=object)"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.unique(ids['provider'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Keywords"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"df['n_keywords'] = df.keywords.str.len()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_keywords</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1681310</th>\n",
" <td>0000-0002-0673-0341</td>\n",
" <td>154.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7717699</th>\n",
" <td>0000-0002-7060-4112</td>\n",
" <td>141.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4597674</th>\n",
" <td>0000-0002-6075-3501</td>\n",
" <td>140.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2066580</th>\n",
" <td>0000-0002-4071-0301</td>\n",
" <td>118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3531030</th>\n",
" <td>0000-0002-9638-8091</td>\n",
" <td>115.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747035</th>\n",
" <td>0000-0003-4998-1551</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747036</th>\n",
" <td>0000-0003-4998-4111</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747037</th>\n",
" <td>0000-0003-4998-6045</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747038</th>\n",
" <td>0000-0003-4998-8868</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10747039</th>\n",
" <td>0000-0003-4999-7916</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10744621 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_keywords\n",
"1681310 0000-0002-0673-0341 154.0\n",
"7717699 0000-0002-7060-4112 141.0\n",
"4597674 0000-0002-6075-3501 140.0\n",
"2066580 0000-0002-4071-0301 118.0\n",
"3531030 0000-0002-9638-8091 115.0\n",
"... ... ...\n",
"10747035 0000-0003-4998-1551 NaN\n",
"10747036 0000-0003-4998-4111 NaN\n",
"10747037 0000-0003-4998-6045 NaN\n",
"10747038 0000-0003-4998-8868 NaN\n",
"10747039 0000-0003-4999-7916 NaN\n",
"\n",
"[10744621 rows x 2 columns]"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values('n_keywords', ascending=False)[['orcid', 'n_keywords']]"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0002-0673-0341",
"0000-0002-7060-4112",
"0000-0002-6075-3501",
"0000-0002-4071-0301",
"0000-0002-9638-8091",
"0000-0002-4235-4259",
"0000-0001-9462-5666",
"0000-0003-0076-6287",
"0000-0002-1878-9762",
"0000-0001-6537-7683",
"0000-0001-6307-6027",
"0000-0003-2273-9888",
"0000-0003-1799-0971",
"0000-0001-5287-1949",
"0000-0002-0937-7061",
"0000-0001-9715-9357",
"0000-0001-5696-1052",
"0000-0003-2998-5520",
"0000-0001-5869-2204",
"0000-0002-0156-3580",
"0000-0002-9625-6742",
"0000-0002-8401-8018",
"0000-0001-9985-1697",
"0000-0003-4246-8579",
"0000-0002-7710-0355",
"0000-0002-8083-7382",
"0000-0001-7654-5013",
"0000-0001-6939-3859",
"0000-0002-3061-3364",
"0000-0003-2509-2549",
"0000-0002-0463-0048",
"0000-0001-5230-715X",
"0000-0001-5458-7167",
"0000-0001-9336-6850",
"0000-0003-0209-180X",
"0000-0002-8227-5387",
"0000-0002-9381-2264",
"0000-0003-3340-6413",
"0000-0003-3584-6834",
"0000-0002-2935-1934",
"0000-0002-8644-8396",
"0000-0002-8659-6321",
"0000-0002-3123-3021",
"0000-0001-5637-1124",
"0000-0001-5167-7466",
"0000-0002-8449-2211",
"0000-0003-2532-2906",
"0000-0002-3532-043X",
"0000-0002-2683-4527",
"0000-0003-4505-3678",
"0000-0002-6347-9464",
"0000-0003-4608-3844",
"0000-0003-4374-6374",
"0000-0003-4511-7942",
"0000-0002-1103-9651",
"0000-0003-3720-1183",
"0000-0001-9280-6017",
"0000-0003-4673-1063",
"0000-0001-9586-0780",
"0000-0002-5539-1761",
"0000-0003-2550-1859",
"0000-0002-8499-1045",
"0000-0003-2218-1343",
"0000-0002-5306-7781",
"0000-0003-1863-0265",
"0000-0002-8072-1152",
"0000-0003-3342-6123",
"0000-0002-2252-672X",
"0000-0002-3907-3552",
"0000-0001-6861-9561",
"0000-0002-3597-3350",
"0000-0002-5274-7742",
"0000-0002-3186-8860",
"0000-0001-8689-185X",
"0000-0002-6282-0640",
"0000-0003-3343-5660",
"0000-0001-7133-7848",
"0000-0003-4486-2684",
"0000-0002-9014-2090",
"0000-0001-6843-9325",
"0000-0003-0097-4182",
"0000-0003-1245-7705",
"0000-0002-4432-3448",
"0000-0001-7857-4133",
"0000-0002-1294-2156",
"0000-0001-8445-412X",
"0000-0002-1411-3028",
"0000-0003-3387-3193",
"0000-0002-1545-7818",
"0000-0002-9125-6022",
"0000-0002-3898-9542",
"0000-0002-3866-6460",
"0000-0003-4283-2895",
"0000-0003-4153-6779",
"0000-0002-4598-2891",
"0000-0002-0211-7195",
"0000-0002-1770-9660",
"0000-0002-1960-5857",
"0000-0003-2640-6757",
"0000-0002-5432-9595"
],
"y": [
154,
141,
140,
118,
115,
104,
98,
94,
92,
91,
88,
86,
84,
82,
78,
77,
76,
75,
74,
73,
71,
70,
69,
66,
64,
62,
61,
60,
58,
57,
56,
54,
53,
53,
52,
51,
51,
51,
51,
50,
50,
50,
50,
49,
49,
49,
48,
48,
48,
48,
48,
48,
47,
47,
46,
46,
46,
45,
45,
44,
44,
44,
44,
44,
44,
43,
43,
42,
42,
42,
42,
42,
42,
41,
41,
41,
41,
41,
41,
41,
40,
40,
40,
40,
40,
40,
39,
39,
39,
39,
39,
39,
39,
39,
39,
39,
38,
38,
38,
38
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Keywords provided"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"dc5f580a-e876-463c-a741-0276de187bae\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"dc5f580a-e876-463c-a741-0276de187bae\")) { Plotly.newPlot( \"dc5f580a-e876-463c-a741-0276de187bae\", [{\"type\": \"bar\", \"x\": [\"0000-0002-0673-0341\", \"0000-0002-7060-4112\", \"0000-0002-6075-3501\", \"0000-0002-4071-0301\", \"0000-0002-9638-8091\", \"0000-0002-4235-4259\", \"0000-0001-9462-5666\", \"0000-0003-0076-6287\", \"0000-0002-1878-9762\", \"0000-0001-6537-7683\", \"0000-0001-6307-6027\", \"0000-0003-2273-9888\", \"0000-0003-1799-0971\", \"0000-0001-5287-1949\", \"0000-0002-0937-7061\", \"0000-0001-9715-9357\", \"0000-0001-5696-1052\", \"0000-0003-2998-5520\", \"0000-0001-5869-2204\", \"0000-0002-0156-3580\", \"0000-0002-9625-6742\", \"0000-0002-8401-8018\", \"0000-0001-9985-1697\", \"0000-0003-4246-8579\", \"0000-0002-7710-0355\", \"0000-0002-8083-7382\", \"0000-0001-7654-5013\", \"0000-0001-6939-3859\", \"0000-0002-3061-3364\", \"0000-0003-2509-2549\", \"0000-0002-0463-0048\", \"0000-0001-5230-715X\", \"0000-0001-5458-7167\", \"0000-0001-9336-6850\", \"0000-0003-0209-180X\", \"0000-0002-8227-5387\", \"0000-0002-9381-2264\", \"0000-0003-3340-6413\", \"0000-0003-3584-6834\", \"0000-0002-2935-1934\", \"0000-0002-8644-8396\", \"0000-0002-8659-6321\", \"0000-0002-3123-3021\", \"0000-0001-5637-1124\", \"0000-0001-5167-7466\", \"0000-0002-8449-2211\", \"0000-0003-2532-2906\", \"0000-0002-3532-043X\", \"0000-0002-2683-4527\", \"0000-0003-4505-3678\", \"0000-0002-6347-9464\", \"0000-0003-4608-3844\", \"0000-0003-4374-6374\", \"0000-0003-4511-7942\", \"0000-0002-1103-9651\", \"0000-0003-3720-1183\", \"0000-0001-9280-6017\", \"0000-0003-4673-1063\", \"0000-0001-9586-0780\", \"0000-0002-5539-1761\", \"0000-0003-2550-1859\", \"0000-0002-8499-1045\", \"0000-0003-2218-1343\", \"0000-0002-5306-7781\", \"0000-0003-1863-0265\", \"0000-0002-8072-1152\", \"0000-0003-3342-6123\", \"0000-0002-2252-672X\", \"0000-0002-3907-3552\", \"0000-0001-6861-9561\", \"0000-0002-3597-3350\", \"0000-0002-5274-7742\", \"0000-0002-3186-8860\", \"0000-0001-8689-185X\", \"0000-0002-6282-0640\", \"0000-0003-3343-5660\", \"0000-0001-7133-7848\", \"0000-0003-4486-2684\", \"0000-0002-9014-2090\", \"0000-0001-6843-9325\", \"0000-0003-0097-4182\", \"0000-0003-1245-7705\", \"0000-0002-4432-3448\", \"0000-0001-7857-4133\", \"0000-0002-1294-2156\", \"0000-0001-8445-412X\", \"0000-0002-1411-3028\", \"0000-0003-3387-3193\", \"0000-0002-1545-7818\", \"0000-0002-9125-6022\", \"0000-0002-3898-9542\", \"0000-0002-3866-6460\", \"0000-0003-4283-2895\", \"0000-0003-4153-6779\", \"0000-0002-4598-2891\", \"0000-0002-0211-7195\", \"0000-0002-1770-9660\", \"0000-0002-1960-5857\", \"0000-0003-2640-6757\", \"0000-0002-5432-9595\"], \"y\": [154.0, 141.0, 140.0, 118.0, 115.0, 104.0, 98.0, 94.0, 92.0, 91.0, 88.0, 86.0, 84.0, 82.0, 78.0, 77.0, 76.0, 75.0, 74.0, 73.0, 71.0, 70.0, 69.0, 66.0, 64.0, 62.0, 61.0, 60.0, 58.0, 57.0, 56.0, 54.0, 53.0, 53.0, 52.0, 51.0, 51.0, 51.0, 51.0, 50.0, 50.0, 50.0, 50.0, 49.0, 49.0, 49.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 47.0, 47.0, 46.0, 46.0, 46.0, 45.0, 45.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 43.0, 43.0, 42.0, 42.0, 42.0, 42.0, 42.0, 42.0, 41.0, 41.0, 41.0, 41.0, 41.0, 41.0, 41.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 38.0, 38.0, 38.0, 38.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Keywords provided\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('dc5f580a-e876-463c-a741-0276de187bae');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=df.sort_values('n_keywords', ascending=False)['orcid'][:100],\n",
" y=df.sort_values('n_keywords', ascending=False)['n_keywords'][:100]\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Keywords provided',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Correlation"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"coloraxis": "coloraxis",
"hovertemplate": "x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>",
"name": "0",
"type": "heatmap",
"x": [
"claimed",
"verified_email",
"verified_primary_email",
"n_works",
"n_emails",
"n_urls",
"n_ids",
"n_keywords"
],
"xaxis": "x",
"y": [
"claimed",
"verified_email",
"verified_primary_email",
"n_works",
"n_emails",
"n_urls",
"n_ids",
"n_keywords"
],
"yaxis": "y",
"z": [
[
null,
null,
null,
null,
null,
null,
null,
null
],
[
null,
1,
0.9764600091179001,
0.06481728326324665,
0.011268135706995959,
0.016235518285109687,
0.08833871138587861,
0.017045184883982135
],
[
null,
0.9764600091179001,
1,
0.0659649633755603,
0.009337798958686118,
0.0168692743777146,
0.08953190210488794,
0.017626925761491493
],
[
null,
0.06481728326324665,
0.0659649633755603,
1,
0.05241643710974057,
0.05179828612278866,
0.23927720131732308,
0.030960689715636
],
[
null,
0.011268135706995959,
0.009337798958686118,
0.05241643710974057,
1,
0.11119920854300894,
0.04817263453943147,
0.04157758861961359
],
[
null,
0.016235518285109687,
0.0168692743777146,
0.05179828612278866,
0.11119920854300894,
1,
0.06925708918455128,
0.15926017909633472
],
[
null,
0.08833871138587861,
0.08953190210488794,
0.23927720131732308,
0.04817263453943147,
0.06925708918455128,
1,
0.06320236481237387
],
[
null,
0.017045184883982135,
0.017626925761491493,
0.030960689715636,
0.04157758861961359,
0.15926017909633472,
0.06320236481237387,
1
]
]
}
],
"layout": {
"coloraxis": {
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"xaxis": {
"anchor": "y",
"constrain": "domain",
"domain": [
0,
1
],
"scaleanchor": "y"
},
"yaxis": {
"anchor": "x",
"autorange": "reversed",
"constrain": "domain",
"domain": [
0,
1
]
}
}
},
"text/html": [
"<div> <div id=\"ce812075-8439-44eb-98c6-e90030696262\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"ce812075-8439-44eb-98c6-e90030696262\")) { Plotly.newPlot( \"ce812075-8439-44eb-98c6-e90030696262\", [{\"coloraxis\": \"coloraxis\", \"hovertemplate\": \"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\", \"name\": \"0\", \"type\": \"heatmap\", \"x\": [\"claimed\", \"verified_email\", \"verified_primary_email\", \"n_works\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\"], \"xaxis\": \"x\", \"y\": [\"claimed\", \"verified_email\", \"verified_primary_email\", \"n_works\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\"], \"yaxis\": \"y\", \"z\": [[null, null, null, null, null, null, null, null], [null, 1.0, 0.9764600091179001, 0.06481728326324665, 0.011268135706995959, 0.016235518285109687, 0.08833871138587861, 0.017045184883982135], [null, 0.9764600091179001, 1.0, 0.0659649633755603, 0.009337798958686118, 0.0168692743777146, 0.08953190210488794, 0.017626925761491493], [null, 0.06481728326324665, 0.0659649633755603, 1.0, 0.05241643710974057, 0.05179828612278866, 0.23927720131732308, 0.030960689715636], [null, 0.011268135706995959, 0.009337798958686118, 0.05241643710974057, 1.0, 0.11119920854300894, 0.04817263453943147, 0.04157758861961359], [null, 0.016235518285109687, 0.0168692743777146, 0.05179828612278866, 0.11119920854300894, 1.0, 0.06925708918455128, 0.15926017909633472], [null, 0.08833871138587861, 0.08953190210488794, 0.23927720131732308, 0.04817263453943147, 0.06925708918455128, 1.0, 0.06320236481237387], [null, 0.017045184883982135, 0.017626925761491493, 0.030960689715636, 0.04157758861961359, 0.15926017909633472, 0.06320236481237387, 1.0]]}], {\"coloraxis\": {\"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"margin\": {\"t\": 60}, \"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"xaxis\": {\"anchor\": \"y\", \"constrain\": \"domain\", \"domain\": [0.0, 1.0], \"scaleanchor\": \"y\"}, \"yaxis\": {\"anchor\": \"x\", \"autorange\": \"reversed\", \"constrain\": \"domain\", \"domain\": [0.0, 1.0]}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('ce812075-8439-44eb-98c6-e90030696262');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = px.imshow(df[df.n_ids > 0].corr())\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}