fake-orcid-analysis/notebooks/01-Exploration.ipynb

18766 lines
678 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploratory analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TODO:\n",
"- Understanding the reason for fake profiles can bring insight on how to catch them (could be trivial with prior knowledge, e.g., SEO hacking => URLs)\n",
"- Make casistics (e.g. author publishing with empty orcid, author publishing but not on OpenAIRE, etc.)\n",
"- Temporal dimension of any use?\n",
"- Can we access private info thanks to the OpenAIRE-ORCID agreement?\n"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import glob\n",
"\n",
"import pandas as pd\n",
"import ast\n",
"import tldextract\n",
"import numpy as np\n",
"\n",
"import antispam\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"init_notebook_mode(connected=True)\n",
"TOP_N = 0\n",
"TOP_RANGE = [0, 0]\n",
"\n",
"def set_top_n(n):\n",
" global TOP_N, TOP_RANGE\n",
" TOP_N = n\n",
" TOP_RANGE = [-.5, n - 1 + .5]\n",
" \n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable solid ORCID iDs for explorative purposes:"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"AM = '0000-0002-5193-7851'\n",
"PP = '0000-0002-8588-4196'\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable anomalies:"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"JOURNAL = '0000-0003-1815-5732'\n",
"NOINFO = '0000-0001-5009-2052'\n",
"VALID_NO_OA = '0000-0002-5154-6404' # True profile, but not in OpenAIRE\n",
"WORK_MISUSE = '0000-0001-7870-1120'\n",
"# todo: find group-shared ORCiD, if possible"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable fake ORCID iDs:"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"SCAFFOLD = '0000-0001-5004-7761'\n",
"WHATSAPP = '0000-0001-6997-9470'\n",
"PENIS = '0000-0002-3399-7287'\n",
"BITCOIN = '0000-0002-7518-6845'\n",
"FITNESS_CHINA = '0000-0002-1234-835X' # URL record + employment\n",
"CANNABIS = '0000-0002-9025-8632' # URL > 70 + works (REMOVED)\n",
"PLUMBER = '0000-0002-1700-8311' # URL > 10 + works "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the dataset"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0001-6097-3953</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-03-02t09:29:16.528z</td>\n",
" <td>2018-03-02t09:43:07.551z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0001-6112-5550</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[v.i. yurtaev; v. yurtaev]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[professor, peoples friendship university of ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-04-03t07:50:23.358z</td>\n",
" <td>2020-03-18t09:42:44.753z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-6152-2695</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2019-12-11t15:31:56.388z</td>\n",
" <td>2020-01-28t15:34:17.309z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-6220-5683</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, new york university abu ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-08-18t12:36:45.307z</td>\n",
" <td>2020-09-23t13:37:54.180z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-7071-8294</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcher (academic), universidad de zarago...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2014-03-10t13:22:01.966z</td>\n",
" <td>2016-06-14t22:17:54.470z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email given_names \\\n",
"0 0000-0001-6097-3953 False False <NA> \n",
"1 0000-0001-6112-5550 True True <NA> \n",
"2 0000-0001-6152-2695 True True <NA> \n",
"3 0000-0001-6220-5683 True True <NA> \n",
"4 0000-0001-7071-8294 True True <NA> \n",
"\n",
" family_name biography other_names primary_email keywords \\\n",
"0 <NA> <NA> NaN <NA> NaN \n",
"1 <NA> <NA> [v.i. yurtaev; v. yurtaev] <NA> NaN \n",
"2 <NA> <NA> NaN <NA> NaN \n",
"3 <NA> <NA> NaN <NA> NaN \n",
"4 <NA> <NA> NaN <NA> NaN \n",
"\n",
" external_ids education employment \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN [[professor, peoples friendship university of ... \n",
"2 NaN NaN NaN \n",
"3 NaN NaN [[research scientist, new york university abu ... \n",
"4 NaN NaN [[researcher (academic), universidad de zarago... \n",
"\n",
" n_works works_source activation_date last_update_date \\\n",
"0 0 NaN 2018-03-02t09:29:16.528z 2018-03-02t09:43:07.551z \n",
"1 0 NaN 2018-04-03t07:50:23.358z 2020-03-18t09:42:44.753z \n",
"2 0 NaN 2019-12-11t15:31:56.388z 2020-01-28t15:34:17.309z \n",
"3 0 NaN 2015-08-18t12:36:45.307z 2020-09-23t13:37:54.180z \n",
"4 0 NaN 2014-03-10t13:22:01.966z 2016-06-14t22:17:54.470z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"0 0 0 0 0 0 NaN \n",
"1 0 0 0 0 0 NaN \n",
"2 0 0 0 0 0 NaN \n",
"3 0 0 0 0 0 NaN \n",
"4 0 0 0 0 0 NaN \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids n_keywords \\\n",
"0 NaN NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" n_education n_employment \n",
"0 NaN NaN \n",
"1 NaN 1.0 \n",
"2 NaN NaN \n",
"3 NaN 1.0 \n",
"4 NaN 2.0 "
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parts = glob.glob('../data/processed/dataset.pkl.*')\n",
"\n",
"df = pd.concat((pd.read_pickle(part) for part in sorted(parts)))\n",
"df.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable profiles inspection"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3073261</th>\n",
" <td>0000-0002-5193-7851</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>andrea</td>\n",
" <td>mannocci</td>\n",
" <td>data scientist &amp; researcher; scholarly knowled...</td>\n",
" <td>NaN</td>\n",
" <td>andrea.mannocci@isti.cnr.it</td>\n",
" <td>[research infrastructures, science of science,...</td>\n",
" <td>[[scopus author id, 55233589900]]</td>\n",
" <td>[[information engineering, ph.d., università d...</td>\n",
" <td>[[research associate, istituto di scienza e te...</td>\n",
" <td>37</td>\n",
" <td>[scopus - elsevier, crossref metadata search, ...</td>\n",
" <td>2017-09-12t14:28:33.467z</td>\n",
" <td>2021-03-17t15:40:07.776z</td>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" <td>1</td>\n",
" <td>isti.cnr.it</td>\n",
" <td>NaN</td>\n",
" <td>[github.io, twitter.com, linkedin.com]</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"3073261 0000-0002-5193-7851 1 1 \n",
"\n",
" given_names family_name \\\n",
"3073261 andrea mannocci \n",
"\n",
" biography other_names \\\n",
"3073261 data scientist & researcher; scholarly knowled... NaN \n",
"\n",
" primary_email \\\n",
"3073261 andrea.mannocci@isti.cnr.it \n",
"\n",
" keywords \\\n",
"3073261 [research infrastructures, science of science,... \n",
"\n",
" external_ids \\\n",
"3073261 [[scopus author id, 55233589900]] \n",
"\n",
" education \\\n",
"3073261 [[information engineering, ph.d., università d... \n",
"\n",
" employment n_works \\\n",
"3073261 [[research associate, istituto di scienza e te... 37 \n",
"\n",
" works_source \\\n",
"3073261 [scopus - elsevier, crossref metadata search, ... \n",
"\n",
" activation_date last_update_date n_doi n_arxiv \\\n",
"3073261 2017-09-12t14:28:33.467z 2021-03-17t15:40:07.776z 34 0 \n",
"\n",
" n_pmc n_other_pids label primary_email_domain other_email_domains \\\n",
"3073261 0 60 1 isti.cnr.it NaN \n",
"\n",
" url_domains n_emails n_urls n_ids \\\n",
"3073261 [github.io, twitter.com, linkedin.com] NaN 3.0 1.0 \n",
"\n",
" n_keywords n_education n_employment \n",
"3073261 5.0 4.0 5.0 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == AM]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9887272</th>\n",
" <td>0000-0001-6997-9470</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>other</td>\n",
" <td>whatsapp</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[whatsapp gb apk, whatsapp gb baixar, whatsapp...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-07t10:37:12.237z</td>\n",
" <td>2020-10-08t02:32:03.935z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[otherwhatsapp.com, im-creator.com, facebook.c...</td>\n",
" <td>NaN</td>\n",
" <td>27.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"9887272 0000-0001-6997-9470 1 1 \n",
"\n",
" given_names family_name biography other_names primary_email \\\n",
"9887272 other whatsapp NaN NaN NaN \n",
"\n",
" keywords external_ids \\\n",
"9887272 [whatsapp gb apk, whatsapp gb baixar, whatsapp... NaN \n",
"\n",
" education employment n_works works_source activation_date \\\n",
"9887272 NaN NaN 0 NaN 2020-10-07t10:37:12.237z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n",
"9887272 2020-10-08t02:32:03.935z 0 0 0 0 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"9887272 NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"9887272 [otherwhatsapp.com, im-creator.com, facebook.c... NaN 27.0 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"9887272 NaN 4.0 NaN NaN "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == WHATSAPP]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"orcid 10989649\n",
"verified_email 10989649\n",
"verified_primary_email 10989649\n",
"given_names 10959039\n",
"family_name 10671715\n",
"biography 354015\n",
"other_names 554684\n",
"primary_email 124722\n",
"keywords 649637\n",
"external_ids 1308598\n",
"education 2441645\n",
"employment 2680488\n",
"n_works 10989649\n",
"works_source 2740939\n",
"activation_date 10989649\n",
"last_update_date 10989649\n",
"n_doi 10989649\n",
"n_arxiv 10989649\n",
"n_pmc 10989649\n",
"n_other_pids 10989649\n",
"label 10989649\n",
"primary_email_domain 124722\n",
"other_email_domains 48615\n",
"url_domains 715067\n",
"n_emails 48615\n",
"n_urls 715067\n",
"n_ids 1308598\n",
"n_keywords 649637\n",
"n_education 2441645\n",
"n_employment 2680488\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.count()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 10989649\n",
"unique 10989649\n",
"top 0000-0001-7886-4851\n",
"freq 1\n",
"Name: orcid, dtype: object"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['orcid'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Primary email"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 124722\n",
"unique 124718\n",
"top maykin@owasp.org\n",
"freq 2\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dupe emails"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1681787 opercin@erbakan.edu.tr\n",
"5590332 patrick.davey@monash.edu\n",
"9316843 maykin@owasp.org\n",
"10375852 andycheng2026@163.com\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email'].dropna().loc[df['primary_email'].duplicated()]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7543981</th>\n",
" <td>0000-0002-0836-2271</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>maykin</td>\n",
" <td>warasart</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-09-15t04:43:55.709z</td>\n",
" <td>2020-09-15t05:17:28.509z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>owasp.org</td>\n",
" <td>[dga.or.th]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9316843</th>\n",
" <td>0000-0001-9855-1676</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>maykin</td>\n",
" <td>warasart</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-23t17:51:51.925z</td>\n",
" <td>2021-01-01t15:00:52.053z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>owasp.org</td>\n",
" <td>[dga.or.th, ieee.org]</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"7543981 0000-0002-0836-2271 1 1 \n",
"9316843 0000-0001-9855-1676 1 1 \n",
"\n",
" given_names family_name biography other_names primary_email \\\n",
"7543981 maykin warasart NaN NaN maykin@owasp.org \n",
"9316843 maykin warasart NaN NaN maykin@owasp.org \n",
"\n",
" keywords external_ids education employment n_works works_source \\\n",
"7543981 NaN NaN NaN NaN 0 NaN \n",
"9316843 NaN NaN NaN NaN 0 NaN \n",
"\n",
" activation_date last_update_date n_doi n_arxiv \\\n",
"7543981 2020-09-15t04:43:55.709z 2020-09-15t05:17:28.509z 0 0 \n",
"9316843 2020-10-23t17:51:51.925z 2021-01-01t15:00:52.053z 0 0 \n",
"\n",
" n_pmc n_other_pids label primary_email_domain \\\n",
"7543981 0 0 0 owasp.org \n",
"9316843 0 0 0 owasp.org \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids \\\n",
"7543981 [dga.or.th] NaN 1.0 NaN NaN \n",
"9316843 [dga.or.th, ieee.org] NaN 2.0 NaN NaN \n",
"\n",
" n_keywords n_education n_employment \n",
"7543981 NaN NaN NaN \n",
"9316843 NaN NaN NaN "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'maykin@owasp.org']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>347852</th>\n",
" <td>0000-0002-2232-9638</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>osman</td>\n",
" <td>perçin</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-01-12t13:47:55.549z</td>\n",
" <td>2020-01-27t07:38:24.269z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1681787</th>\n",
" <td>0000-0003-0033-0918</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>osman</td>\n",
" <td>perçin</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, necmettin erbakan university, konya, , tr,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-10-13t05:47:12.014z</td>\n",
" <td>2020-12-25t13:52:03.976z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"347852 0000-0002-2232-9638 1 1 \n",
"1681787 0000-0003-0033-0918 1 1 \n",
"\n",
" given_names family_name biography other_names primary_email \\\n",
"347852 osman perçin NaN NaN opercin@erbakan.edu.tr \n",
"1681787 osman perçin NaN NaN opercin@erbakan.edu.tr \n",
"\n",
" keywords external_ids education \\\n",
"347852 NaN NaN NaN \n",
"1681787 NaN NaN NaN \n",
"\n",
" employment n_works \\\n",
"347852 NaN 0 \n",
"1681787 [[, necmettin erbakan university, konya, , tr,... 0 \n",
"\n",
" works_source activation_date last_update_date \\\n",
"347852 NaN 2015-01-12t13:47:55.549z 2020-01-27t07:38:24.269z \n",
"1681787 NaN 2015-10-13t05:47:12.014z 2020-12-25t13:52:03.976z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"347852 0 0 0 0 0 erbakan.edu.tr \n",
"1681787 0 0 0 0 0 erbakan.edu.tr \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids n_keywords \\\n",
"347852 NaN NaN NaN NaN NaN NaN \n",
"1681787 NaN NaN NaN NaN NaN NaN \n",
"\n",
" n_education n_employment \n",
"347852 NaN NaN \n",
"1681787 NaN 1.0 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'opercin@erbakan.edu.tr']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>954085</th>\n",
" <td>0000-0002-9158-1757</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>patrick</td>\n",
" <td>davey</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>patrick.davey@monash.edu</td>\n",
" <td>[radiochemistry, radiopharmaceuticals, inorgan...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[phd student, monash university, melbourne, ,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2019-05-09t23:01:02.170z</td>\n",
" <td>2019-08-20t03:00:17.844z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5590332</th>\n",
" <td>0000-0002-8774-0030</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>patrick</td>\n",
" <td>davey</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>patrick.davey@monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[phd student, monash university, melbourne, v...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2018-09-11t10:47:10.997z</td>\n",
" <td>2021-02-09t06:21:44.138z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"954085 0000-0002-9158-1757 1 1 \n",
"5590332 0000-0002-8774-0030 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"954085 patrick davey NaN NaN \n",
"5590332 patrick davey NaN NaN \n",
"\n",
" primary_email \\\n",
"954085 patrick.davey@monash.edu \n",
"5590332 patrick.davey@monash.edu \n",
"\n",
" keywords external_ids \\\n",
"954085 [radiochemistry, radiopharmaceuticals, inorgan... NaN \n",
"5590332 NaN NaN \n",
"\n",
" education employment n_works \\\n",
"954085 NaN [[phd student, monash university, melbourne, ,... 0 \n",
"5590332 NaN [[phd student, monash university, melbourne, v... 1 \n",
"\n",
" works_source activation_date last_update_date \\\n",
"954085 NaN 2019-05-09t23:01:02.170z 2019-08-20t03:00:17.844z \n",
"5590332 [crossref] 2018-09-11t10:47:10.997z 2021-02-09t06:21:44.138z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"954085 0 0 0 0 0 monash.edu \n",
"5590332 1 0 0 0 1 monash.edu \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids n_keywords \\\n",
"954085 NaN NaN NaN NaN NaN 4.0 \n",
"5590332 NaN NaN NaN NaN NaN NaN \n",
"\n",
" n_education n_employment \n",
"954085 NaN 1.0 \n",
"5590332 NaN 1.0 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'patrick.davey@monash.edu']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 124722\n",
"unique 17160\n",
"top gmail.com\n",
"freq 26750\n",
"Name: primary_email_domain, dtype: object"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email_domain'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" </tr>\n",
" <tr>\n",
" <th>primary_email_domain</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>gmail.com</th>\n",
" <td>26750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hotmail.com</th>\n",
" <td>3801</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yahoo.com</th>\n",
" <td>2625</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163.com</th>\n",
" <td>2132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yuhs.ac</th>\n",
" <td>1134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imf.csic.es</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imf.org</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imfd.tu-freiberg.de</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imft.fr</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>zzuli.edu.cn</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>17160 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid\n",
"primary_email_domain \n",
"gmail.com 26750\n",
"hotmail.com 3801\n",
"yahoo.com 2625\n",
"163.com 2132\n",
"yuhs.ac 1134\n",
"... ...\n",
"imf.csic.es 1\n",
"imf.org 1\n",
"imfd.tu-freiberg.de 1\n",
"imft.fr 1\n",
"zzuli.edu.cn 1\n",
"\n",
"[17160 rows x 1 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_primary_emails = df[['primary_email_domain', 'orcid']]\\\n",
" .groupby('primary_email_domain')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)\n",
"top_primary_emails"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"163.com",
"yuhs.ac",
"qq.com",
"outlook.com",
"126.com",
"bu.edu",
"usgs.gov",
"mail.ru",
"usp.br",
"yahoo.com.br",
"ua.pt",
"umich.edu",
"ust.hk",
"foxmail.com",
"uomustansiriyah.edu.iq",
"yandex.ru",
"uq.edu.au",
"ukr.net",
"unesp.br",
"ucl.ac.uk",
"ieee.org",
"naver.com",
"stcatz.ox.ac.uk",
"st-annes.ox.ac.uk",
"yahoo.fr",
"ucm.es",
"live.com"
],
"y": [
26750,
3801,
2625,
2132,
1134,
1059,
948,
766,
629,
586,
579,
464,
459,
302,
290,
277,
260,
248,
244,
235,
226,
218,
210,
205,
188,
184,
184,
174,
174,
165
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-30 email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"df7e136d-ecdd-404a-835c-dc74f32d700a\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"df7e136d-ecdd-404a-835c-dc74f32d700a\")) { Plotly.newPlot( \"df7e136d-ecdd-404a-835c-dc74f32d700a\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"163.com\", \"yuhs.ac\", \"qq.com\", \"outlook.com\", \"126.com\", \"bu.edu\", \"usgs.gov\", \"mail.ru\", \"usp.br\", \"yahoo.com.br\", \"ua.pt\", \"umich.edu\", \"ust.hk\", \"foxmail.com\", \"uomustansiriyah.edu.iq\", \"yandex.ru\", \"uq.edu.au\", \"ukr.net\", \"unesp.br\", \"ucl.ac.uk\", \"ieee.org\", \"naver.com\", \"stcatz.ox.ac.uk\", \"st-annes.ox.ac.uk\", \"yahoo.fr\", \"ucm.es\", \"live.com\"], \"y\": [26750, 3801, 2625, 2132, 1134, 1059, 948, 766, 629, 586, 579, 464, 459, 302, 290, 277, 260, 248, 244, 235, 226, 218, 210, 205, 188, 184, 184, 174, 174, 165]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top-30 email domains\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('df7e136d-ecdd-404a-835c-dc74f32d700a');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=top_primary_emails[:TOP_N].index,\n",
" y=top_primary_emails[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s email domains' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Other emails"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>251</th>\n",
" <td>0000-0002-5916-446X</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>antonio gilvan</td>\n",
" <td>teixeira júnior</td>\n",
" <td>NaN</td>\n",
" <td>[teixeira, antônio gilvan, júnior, antonio gil...</td>\n",
" <td>gilvan.junior@aluno.ufca.edu.br</td>\n",
" <td>[ethicis; medicine; infectology; neurology; ne...</td>\n",
" <td>[[scopus author id, 56647743200], [scopus auth...</td>\n",
" <td>[[faculty of health and life sciences, , unive...</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>[antonio gilvan teixeira júnior, scopus - else...</td>\n",
" <td>2016-05-18t11:26:36.642z</td>\n",
" <td>2016-09-20t18:25:05.728z</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>aluno.ufca.edu.br</td>\n",
" <td>[liverpool.ac.uk]</td>\n",
" <td>[researchgate.net, academia.edu, cnpq.br]</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>316</th>\n",
" <td>0000-0002-8742-947X</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>aaron</td>\n",
" <td>tan shing loong</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>aaron.tanshingloong@wadh.ox.ac.uk</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[ruskin school of art; wadham college, , univ...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-10-05t23:10:08.771z</td>\n",
" <td>2016-06-14t19:55:50.313z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>wadh.ox.ac.uk</td>\n",
" <td>[rsa.ox.ac.uk]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>433</th>\n",
" <td>0000-0001-9097-2281</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>abhishek</td>\n",
" <td>solanki</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[senior engineer, robert bosch (india), benga...</td>\n",
" <td>1</td>\n",
" <td>[abhishek solanki]</td>\n",
" <td>2019-04-22t04:43:06.232z</td>\n",
" <td>2020-07-02t14:18:28.305z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[in.bosch.com]</td>\n",
" <td>[github.com, linkedin.com]</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>497</th>\n",
" <td>0000-0002-8614-3007</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>adam</td>\n",
" <td>arra</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-11-15t06:33:45.625z</td>\n",
" <td>2017-11-15t06:44:02.998z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[hct.ac.ae]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>869</th>\n",
" <td>0000-0001-9884-5498</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>alberto</td>\n",
" <td>ronzani</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>alberto@aronza.com</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, vtt technical research c...</td>\n",
" <td>19</td>\n",
" <td>[crossref metadata search, alberto ronzani, cr...</td>\n",
" <td>2014-04-16t13:21:54.287z</td>\n",
" <td>2020-09-28t15:10:37.439z</td>\n",
" <td>18</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>aronza.com</td>\n",
" <td>[vtt.fi]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"251 0000-0002-5916-446X 1 1 \n",
"316 0000-0002-8742-947X 1 1 \n",
"433 0000-0001-9097-2281 1 1 \n",
"497 0000-0002-8614-3007 1 1 \n",
"869 0000-0001-9884-5498 1 1 \n",
"\n",
" given_names family_name biography \\\n",
"251 antonio gilvan teixeira júnior NaN \n",
"316 aaron tan shing loong NaN \n",
"433 abhishek solanki NaN \n",
"497 adam arra NaN \n",
"869 alberto ronzani NaN \n",
"\n",
" other_names \\\n",
"251 [teixeira, antônio gilvan, júnior, antonio gil... \n",
"316 NaN \n",
"433 NaN \n",
"497 NaN \n",
"869 NaN \n",
"\n",
" primary_email \\\n",
"251 gilvan.junior@aluno.ufca.edu.br \n",
"316 aaron.tanshingloong@wadh.ox.ac.uk \n",
"433 NaN \n",
"497 NaN \n",
"869 alberto@aronza.com \n",
"\n",
" keywords \\\n",
"251 [ethicis; medicine; infectology; neurology; ne... \n",
"316 NaN \n",
"433 NaN \n",
"497 NaN \n",
"869 NaN \n",
"\n",
" external_ids \\\n",
"251 [[scopus author id, 56647743200], [scopus auth... \n",
"316 NaN \n",
"433 NaN \n",
"497 NaN \n",
"869 NaN \n",
"\n",
" education \\\n",
"251 [[faculty of health and life sciences, , unive... \n",
"316 [[ruskin school of art; wadham college, , univ... \n",
"433 NaN \n",
"497 NaN \n",
"869 NaN \n",
"\n",
" employment n_works \\\n",
"251 NaN 14 \n",
"316 NaN 0 \n",
"433 [[senior engineer, robert bosch (india), benga... 1 \n",
"497 NaN 0 \n",
"869 [[research scientist, vtt technical research c... 19 \n",
"\n",
" works_source \\\n",
"251 [antonio gilvan teixeira júnior, scopus - else... \n",
"316 NaN \n",
"433 [abhishek solanki] \n",
"497 NaN \n",
"869 [crossref metadata search, alberto ronzani, cr... \n",
"\n",
" activation_date last_update_date n_doi n_arxiv \\\n",
"251 2016-05-18t11:26:36.642z 2016-09-20t18:25:05.728z 13 0 \n",
"316 2015-10-05t23:10:08.771z 2016-06-14t19:55:50.313z 0 0 \n",
"433 2019-04-22t04:43:06.232z 2020-07-02t14:18:28.305z 0 0 \n",
"497 2017-11-15t06:33:45.625z 2017-11-15t06:44:02.998z 0 0 \n",
"869 2014-04-16t13:21:54.287z 2020-09-28t15:10:37.439z 18 0 \n",
"\n",
" n_pmc n_other_pids label primary_email_domain other_email_domains \\\n",
"251 0 8 0 aluno.ufca.edu.br [liverpool.ac.uk] \n",
"316 0 0 0 wadh.ox.ac.uk [rsa.ox.ac.uk] \n",
"433 0 0 0 NaN [in.bosch.com] \n",
"497 0 0 0 NaN [hct.ac.ae] \n",
"869 0 3 1 aronza.com [vtt.fi] \n",
"\n",
" url_domains n_emails n_urls n_ids \\\n",
"251 [researchgate.net, academia.edu, cnpq.br] 1.0 3.0 4.0 \n",
"316 NaN 1.0 NaN NaN \n",
"433 [github.com, linkedin.com] 1.0 2.0 NaN \n",
"497 NaN 1.0 NaN NaN \n",
"869 NaN 1.0 NaN NaN \n",
"\n",
" n_keywords n_education n_employment \n",
"251 1.0 1.0 NaN \n",
"316 NaN 1.0 NaN \n",
"433 NaN NaN 2.0 \n",
"497 NaN NaN NaN \n",
"869 NaN NaN 1.0 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.other_email_domains.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"emails_by_orcid = df[['orcid', 'n_emails']].sort_values('n_emails', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0003-4171-3835",
"0000-0001-6239-2968",
"0000-0003-2151-4089",
"0000-0003-2290-2817",
"0000-0001-9084-3156",
"0000-0001-6349-1044",
"0000-0002-2085-1908",
"0000-0003-4147-212X",
"0000-0002-9599-6909",
"0000-0001-9311-0687",
"0000-0003-1502-3910",
"0000-0002-9821-8424",
"0000-0003-4327-6827",
"0000-0002-1929-6054",
"0000-0002-8390-8238",
"0000-0002-1615-8633",
"0000-0003-0671-1543",
"0000-0003-4499-7300",
"0000-0002-5341-6531",
"0000-0002-8565-194X",
"0000-0002-0776-9547",
"0000-0001-8420-9204",
"0000-0002-7396-1561",
"0000-0002-3165-132X",
"0000-0002-2567-3741",
"0000-0003-2657-8225",
"0000-0003-4685-5621",
"0000-0001-5548-8259",
"0000-0003-0391-3430",
"0000-0003-2526-0928"
],
"y": [
12,
9,
7,
7,
6,
6,
6,
6,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
4
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 ORCID iDs by email"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"76eb01ce-8c48-4dec-b04c-6eccf0679c0e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"76eb01ce-8c48-4dec-b04c-6eccf0679c0e\")) { Plotly.newPlot( \"76eb01ce-8c48-4dec-b04c-6eccf0679c0e\", [{\"type\": \"bar\", \"x\": [\"0000-0003-4171-3835\", \"0000-0001-6239-2968\", \"0000-0003-2151-4089\", \"0000-0003-2290-2817\", \"0000-0001-9084-3156\", \"0000-0001-6349-1044\", \"0000-0002-2085-1908\", \"0000-0003-4147-212X\", \"0000-0002-9599-6909\", \"0000-0001-9311-0687\", \"0000-0003-1502-3910\", \"0000-0002-9821-8424\", \"0000-0003-4327-6827\", \"0000-0002-1929-6054\", \"0000-0002-8390-8238\", \"0000-0002-1615-8633\", \"0000-0003-0671-1543\", \"0000-0003-4499-7300\", \"0000-0002-5341-6531\", \"0000-0002-8565-194X\", \"0000-0002-0776-9547\", \"0000-0001-8420-9204\", \"0000-0002-7396-1561\", \"0000-0002-3165-132X\", \"0000-0002-2567-3741\", \"0000-0003-2657-8225\", \"0000-0003-4685-5621\", \"0000-0001-5548-8259\", \"0000-0003-0391-3430\", \"0000-0003-2526-0928\"], \"y\": [12.0, 9.0, 7.0, 7.0, 6.0, 6.0, 6.0, 6.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 ORCID iDs by email\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('76eb01ce-8c48-4dec-b04c-6eccf0679c0e');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=emails_by_orcid[:TOP_N]['orcid'],\n",
" y=emails_by_orcid[:TOP_N]['n_emails']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s ORCID iDs by email' % TOP_N, \n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"top_other_emails = df[['orcid', 'other_email_domains']]\\\n",
" .explode('other_email_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('other_email_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"qq.com",
"163.com",
"outlook.com",
"126.com",
"usp.br",
"ieee.org",
"yahoo.com.br",
"mail.ru",
"unesp.br",
"sbs.ox.ac.uk",
"yuhs.ac",
"naver.com",
"icloud.com",
"foxmail.com",
"uq.edu.au",
"ua.pt",
"cam.ac.uk",
"imperial.ac.uk",
"ukr.net",
"law.ox.ac.uk",
"mit.edu",
"monash.edu",
"stanford.edu",
"ucl.ac.uk",
"education.ox.ac.uk",
"ucm.es",
"conted.ox.ac.uk"
],
"y": [
11198,
1550,
1303,
785,
780,
433,
262,
236,
226,
151,
148,
141,
136,
134,
132,
119,
98,
96,
90,
84,
79,
75,
75,
74,
70,
70,
69,
67,
66,
65
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 other email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"7a2d7e94-ba1f-4134-923a-4bac50a8acae\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"7a2d7e94-ba1f-4134-923a-4bac50a8acae\")) { Plotly.newPlot( \"7a2d7e94-ba1f-4134-923a-4bac50a8acae\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"qq.com\", \"163.com\", \"outlook.com\", \"126.com\", \"usp.br\", \"ieee.org\", \"yahoo.com.br\", \"mail.ru\", \"unesp.br\", \"sbs.ox.ac.uk\", \"yuhs.ac\", \"naver.com\", \"icloud.com\", \"foxmail.com\", \"uq.edu.au\", \"ua.pt\", \"cam.ac.uk\", \"imperial.ac.uk\", \"ukr.net\", \"law.ox.ac.uk\", \"mit.edu\", \"monash.edu\", \"stanford.edu\", \"ucl.ac.uk\", \"education.ox.ac.uk\", \"ucm.es\", \"conted.ox.ac.uk\"], \"y\": [11198, 1550, 1303, 785, 780, 433, 262, 236, 226, 151, 148, 141, 136, 134, 132, 119, 98, 96, 90, 84, 79, 75, 75, 74, 70, 70, 69, 67, 66, 65]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 other email domains\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('7a2d7e94-ba1f-4134-923a-4bac50a8acae');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=top_other_emails[:TOP_N].index,\n",
" y=top_other_emails[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s other email domains' % TOP_N, \n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This somehow makes sense, legitimate users could put the gmail account as primary for login purposes and have institutional addresses as other email addresses. It makes also the life easier upon relocation."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Email speculation"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>433</th>\n",
" <td>0000-0001-9097-2281</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>abhishek</td>\n",
" <td>solanki</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[senior engineer, robert bosch (india), benga...</td>\n",
" <td>1</td>\n",
" <td>[abhishek solanki]</td>\n",
" <td>2019-04-22t04:43:06.232z</td>\n",
" <td>2020-07-02t14:18:28.305z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[in.bosch.com]</td>\n",
" <td>[github.com, linkedin.com]</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>497</th>\n",
" <td>0000-0002-8614-3007</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>adam</td>\n",
" <td>arra</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-11-15t06:33:45.625z</td>\n",
" <td>2017-11-15t06:44:02.998z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[hct.ac.ae]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>898</th>\n",
" <td>0000-0003-3728-6439</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>alejandra</td>\n",
" <td>echeverry velásquez</td>\n",
" <td>alejandra echeverry is an industrial electrici...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[innovation, renewable, control, science, ener...</td>\n",
" <td>NaN</td>\n",
" <td>[[, electrical engineer, institución universit...</td>\n",
" <td>[[professor, institución universitaria pascual...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2019-03-31t00:00:42.929z</td>\n",
" <td>2020-09-06t02:18:54.290z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>[pascualbravo.edu.co]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1719</th>\n",
" <td>0000-0001-8330-7443</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>andrea</td>\n",
" <td>tesoniero</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, d-9056-2015]]</td>\n",
" <td>[[department of geophysics, master of science ...</td>\n",
" <td>[[postdoctoral associate, yale university, new...</td>\n",
" <td>4</td>\n",
" <td>[andrea tesoniero]</td>\n",
" <td>2015-03-09t11:59:06.093z</td>\n",
" <td>2020-08-20t15:03:23.447z</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[yale.edu]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6829</th>\n",
" <td>0000-0001-9670-515X</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>esma esin</td>\n",
" <td>yildirim</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[pharmacognosy, natural chemistry, chemical en...</td>\n",
" <td>NaN</td>\n",
" <td>[[business management, master of science, ista...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-07-26t10:38:03.721z</td>\n",
" <td>2020-07-26t10:52:26.539z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[gmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10985816</th>\n",
" <td>0000-0003-1204-6009</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nathan</td>\n",
" <td>walk</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[department of physics, doctor of philosophy,...</td>\n",
" <td>[[, university of oxford, oxford, oxfordshire,...</td>\n",
" <td>10</td>\n",
" <td>[crossref metadata search]</td>\n",
" <td>2016-07-28t14:24:16.844z</td>\n",
" <td>2020-10-13t11:47:50.621z</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>[cs.ox.ac.uk]</td>\n",
" <td>[fu-berlin.de]</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10986027</th>\n",
" <td>0000-0002-3472-7668</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>raf</td>\n",
" <td>vandevelde</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[chemical engineering technology, master, kat...</td>\n",
" <td>[[phd researcher, katholieke universiteit leuv...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-14t13:56:44.779z</td>\n",
" <td>2020-10-16t14:21:40.673z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[kuleuven.be]</td>\n",
" <td>[linkedin.com]</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10987501</th>\n",
" <td>0000-0002-9602-0529</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>carlos augusto</td>\n",
" <td>finelli</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2013-09-16t16:52:06.120z</td>\n",
" <td>2020-12-01t22:47:08.074z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>[cecot.com.br]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10987829</th>\n",
" <td>0000-0003-4402-5982</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>filipe</td>\n",
" <td>de almeida araújo</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[materials science, msc. materials science, m...</td>\n",
" <td>[[co-owner, aeft acessory, manaus, amazonas, b...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-03-02t20:11:01.699z</td>\n",
" <td>2020-12-04t13:53:39.404z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[ime.eb.br]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10988444</th>\n",
" <td>0000-0002-1734-7241</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>manareldeen</td>\n",
" <td>ahmed</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[graphene, deep learning, atomistic simulation...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[post-doctor, zhejiang university, hangzhou, ...</td>\n",
" <td>6</td>\n",
" <td>[manareldeen ahmed]</td>\n",
" <td>2017-02-17t13:18:36.540z</td>\n",
" <td>2020-12-04t02:04:36.668z</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>[hotmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>19814 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"433 0000-0001-9097-2281 1 1 \n",
"497 0000-0002-8614-3007 1 1 \n",
"898 0000-0003-3728-6439 1 1 \n",
"1719 0000-0001-8330-7443 1 1 \n",
"6829 0000-0001-9670-515X 1 1 \n",
"... ... ... ... \n",
"10985816 0000-0003-1204-6009 1 1 \n",
"10986027 0000-0002-3472-7668 1 1 \n",
"10987501 0000-0002-9602-0529 1 1 \n",
"10987829 0000-0003-4402-5982 1 1 \n",
"10988444 0000-0002-1734-7241 1 1 \n",
"\n",
" given_names family_name \\\n",
"433 abhishek solanki \n",
"497 adam arra \n",
"898 alejandra echeverry velásquez \n",
"1719 andrea tesoniero \n",
"6829 esma esin yildirim \n",
"... ... ... \n",
"10985816 nathan walk \n",
"10986027 raf vandevelde \n",
"10987501 carlos augusto finelli \n",
"10987829 filipe de almeida araújo \n",
"10988444 manareldeen ahmed \n",
"\n",
" biography other_names \\\n",
"433 NaN NaN \n",
"497 NaN NaN \n",
"898 alejandra echeverry is an industrial electrici... NaN \n",
"1719 NaN NaN \n",
"6829 NaN NaN \n",
"... ... ... \n",
"10985816 NaN NaN \n",
"10986027 NaN NaN \n",
"10987501 NaN NaN \n",
"10987829 NaN NaN \n",
"10988444 NaN NaN \n",
"\n",
" primary_email keywords \\\n",
"433 NaN NaN \n",
"497 NaN NaN \n",
"898 NaN [innovation, renewable, control, science, ener... \n",
"1719 NaN NaN \n",
"6829 NaN [pharmacognosy, natural chemistry, chemical en... \n",
"... ... ... \n",
"10985816 NaN NaN \n",
"10986027 NaN NaN \n",
"10987501 NaN NaN \n",
"10987829 NaN NaN \n",
"10988444 NaN [graphene, deep learning, atomistic simulation... \n",
"\n",
" external_ids \\\n",
"433 NaN \n",
"497 NaN \n",
"898 NaN \n",
"1719 [[researcherid, d-9056-2015]] \n",
"6829 NaN \n",
"... ... \n",
"10985816 NaN \n",
"10986027 NaN \n",
"10987501 NaN \n",
"10987829 NaN \n",
"10988444 NaN \n",
"\n",
" education \\\n",
"433 NaN \n",
"497 NaN \n",
"898 [[, electrical engineer, institución universit... \n",
"1719 [[department of geophysics, master of science ... \n",
"6829 [[business management, master of science, ista... \n",
"... ... \n",
"10985816 [[department of physics, doctor of philosophy,... \n",
"10986027 [[chemical engineering technology, master, kat... \n",
"10987501 NaN \n",
"10987829 [[materials science, msc. materials science, m... \n",
"10988444 NaN \n",
"\n",
" employment n_works \\\n",
"433 [[senior engineer, robert bosch (india), benga... 1 \n",
"497 NaN 0 \n",
"898 [[professor, institución universitaria pascual... 1 \n",
"1719 [[postdoctoral associate, yale university, new... 4 \n",
"6829 NaN 0 \n",
"... ... ... \n",
"10985816 [[, university of oxford, oxford, oxfordshire,... 10 \n",
"10986027 [[phd researcher, katholieke universiteit leuv... 0 \n",
"10987501 NaN 1 \n",
"10987829 [[co-owner, aeft acessory, manaus, amazonas, b... 0 \n",
"10988444 [[post-doctor, zhejiang university, hangzhou, ... 6 \n",
"\n",
" works_source activation_date \\\n",
"433 [abhishek solanki] 2019-04-22t04:43:06.232z \n",
"497 NaN 2017-11-15t06:33:45.625z \n",
"898 [crossref] 2019-03-31t00:00:42.929z \n",
"1719 [andrea tesoniero] 2015-03-09t11:59:06.093z \n",
"6829 NaN 2020-07-26t10:38:03.721z \n",
"... ... ... \n",
"10985816 [crossref metadata search] 2016-07-28t14:24:16.844z \n",
"10986027 NaN 2020-10-14t13:56:44.779z \n",
"10987501 [crossref] 2013-09-16t16:52:06.120z \n",
"10987829 NaN 2020-03-02t20:11:01.699z \n",
"10988444 [manareldeen ahmed] 2017-02-17t13:18:36.540z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"433 2020-07-02t14:18:28.305z 0 0 0 0 \n",
"497 2017-11-15t06:44:02.998z 0 0 0 0 \n",
"898 2020-09-06t02:18:54.290z 1 0 0 0 \n",
"1719 2020-08-20t15:03:23.447z 4 0 0 2 \n",
"6829 2020-07-26t10:52:26.539z 0 0 0 0 \n",
"... ... ... ... ... ... \n",
"10985816 2020-10-13t11:47:50.621z 10 0 0 0 \n",
"10986027 2020-10-16t14:21:40.673z 0 0 0 0 \n",
"10987501 2020-12-01t22:47:08.074z 1 0 0 0 \n",
"10987829 2020-12-04t13:53:39.404z 0 0 0 0 \n",
"10988444 2020-12-04t02:04:36.668z 6 0 0 3 \n",
"\n",
" label primary_email_domain other_email_domains \\\n",
"433 0 NaN [in.bosch.com] \n",
"497 0 NaN [hct.ac.ae] \n",
"898 1 NaN [pascualbravo.edu.co] \n",
"1719 0 NaN [yale.edu] \n",
"6829 0 NaN [gmail.com] \n",
"... ... ... ... \n",
"10985816 1 NaN [cs.ox.ac.uk] \n",
"10986027 0 NaN [kuleuven.be] \n",
"10987501 1 NaN [cecot.com.br] \n",
"10987829 0 NaN [ime.eb.br] \n",
"10988444 1 NaN [hotmail.com] \n",
"\n",
" url_domains n_emails n_urls n_ids n_keywords \\\n",
"433 [github.com, linkedin.com] 1.0 2.0 NaN NaN \n",
"497 NaN 1.0 NaN NaN NaN \n",
"898 NaN 1.0 NaN NaN 7.0 \n",
"1719 NaN 1.0 NaN 1.0 NaN \n",
"6829 NaN 1.0 NaN NaN 3.0 \n",
"... ... ... ... ... ... \n",
"10985816 [fu-berlin.de] 1.0 1.0 NaN NaN \n",
"10986027 [linkedin.com] 1.0 1.0 NaN NaN \n",
"10987501 NaN 1.0 NaN NaN NaN \n",
"10987829 NaN 1.0 NaN NaN NaN \n",
"10988444 NaN 1.0 NaN NaN 5.0 \n",
"\n",
" n_education n_employment \n",
"433 NaN 2.0 \n",
"497 NaN NaN \n",
"898 1.0 1.0 \n",
"1719 4.0 2.0 \n",
"6829 3.0 NaN \n",
"... ... ... \n",
"10985816 3.0 2.0 \n",
"10986027 2.0 1.0 \n",
"10987501 NaN NaN \n",
"10987829 2.0 1.0 \n",
"10988444 NaN 1.0 \n",
"\n",
"[19814 rows x 30 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.primary_email.isna() & df.other_email_domains.notna()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## URLs"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0000-0001-7402-0096</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, kth royal institute of technology, stockho...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-01-11t15:13:06.467z</td>\n",
" <td>2016-06-14t23:55:59.896z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[kth.se]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0000-0001-8377-3508</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[fontana, milena da silva]</td>\n",
" <td>NaN</td>\n",
" <td>[educação; informática; matemática.]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, instituto federal de educação, ciência e t...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-05-23t23:39:04.534z</td>\n",
" <td>2019-10-16t02:50:11.007z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0000-0002-2638-4108</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>investigador de la universidad de oviedo. depa...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[constitutional history, history of political ...</td>\n",
" <td>[[scopus author id, 54394231000]]</td>\n",
" <td>[[public law, ph doctor, university of oviedo,...</td>\n",
" <td>[[professor of constitutional law, university ...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2013-03-25t14:38:06.016z</td>\n",
" <td>2020-07-01t13:10:37.025z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[unioviedo.es]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>0000-0003-1435-6545</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[prostate cancer, migration, culture cell]</td>\n",
" <td>[[researcherid, p-2223-2018]]</td>\n",
" <td>[[morfologia, , universidade estadual paulista...</td>\n",
" <td>[[, universidade estadual paulista (unesp), in...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-08-09t12:12:24.405z</td>\n",
" <td>2020-04-22t01:38:03.184z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, linkedin.com]</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>158</th>\n",
" <td>0000-0003-1284-9741</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>alex percy antonio</td>\n",
" <td>manriquez paisig</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-09-08t20:04:33.906z</td>\n",
" <td>2020-09-08t20:25:55.432z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[youtube.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"6 0000-0001-7402-0096 1 1 \n",
"11 0000-0001-8377-3508 1 1 \n",
"29 0000-0002-2638-4108 1 1 \n",
"46 0000-0003-1435-6545 1 1 \n",
"158 0000-0003-1284-9741 1 1 \n",
"\n",
" given_names family_name \\\n",
"6 NaN NaN \n",
"11 NaN NaN \n",
"29 NaN NaN \n",
"46 NaN NaN \n",
"158 alex percy antonio manriquez paisig \n",
"\n",
" biography \\\n",
"6 NaN \n",
"11 NaN \n",
"29 investigador de la universidad de oviedo. depa... \n",
"46 NaN \n",
"158 NaN \n",
"\n",
" other_names primary_email \\\n",
"6 NaN NaN \n",
"11 [fontana, milena da silva] NaN \n",
"29 NaN NaN \n",
"46 NaN NaN \n",
"158 NaN NaN \n",
"\n",
" keywords \\\n",
"6 NaN \n",
"11 [educação; informática; matemática.] \n",
"29 [constitutional history, history of political ... \n",
"46 [prostate cancer, migration, culture cell] \n",
"158 NaN \n",
"\n",
" external_ids \\\n",
"6 NaN \n",
"11 NaN \n",
"29 [[scopus author id, 54394231000]] \n",
"46 [[researcherid, p-2223-2018]] \n",
"158 NaN \n",
"\n",
" education \\\n",
"6 NaN \n",
"11 NaN \n",
"29 [[public law, ph doctor, university of oviedo,... \n",
"46 [[morfologia, , universidade estadual paulista... \n",
"158 NaN \n",
"\n",
" employment n_works works_source \\\n",
"6 [[, kth royal institute of technology, stockho... 0 NaN \n",
"11 [[, instituto federal de educação, ciência e t... 0 NaN \n",
"29 [[professor of constitutional law, university ... 1 [crossref] \n",
"46 [[, universidade estadual paulista (unesp), in... 0 NaN \n",
"158 NaN 0 NaN \n",
"\n",
" activation_date last_update_date n_doi n_arxiv \\\n",
"6 2015-01-11t15:13:06.467z 2016-06-14t23:55:59.896z 0 0 \n",
"11 2018-05-23t23:39:04.534z 2019-10-16t02:50:11.007z 0 0 \n",
"29 2013-03-25t14:38:06.016z 2020-07-01t13:10:37.025z 1 0 \n",
"46 2018-08-09t12:12:24.405z 2020-04-22t01:38:03.184z 0 0 \n",
"158 2020-09-08t20:04:33.906z 2020-09-08t20:25:55.432z 0 0 \n",
"\n",
" n_pmc n_other_pids label primary_email_domain other_email_domains \\\n",
"6 0 0 0 NaN NaN \n",
"11 0 0 0 NaN NaN \n",
"29 0 0 0 NaN NaN \n",
"46 0 0 0 NaN NaN \n",
"158 0 0 0 NaN NaN \n",
"\n",
" url_domains n_emails n_urls n_ids n_keywords \\\n",
"6 [kth.se] NaN 1.0 NaN NaN \n",
"11 [cnpq.br] NaN 1.0 NaN 1.0 \n",
"29 [unioviedo.es] NaN 1.0 1.0 3.0 \n",
"46 [cnpq.br, linkedin.com] NaN 2.0 1.0 3.0 \n",
"158 [youtube.com] NaN 1.0 NaN NaN \n",
"\n",
" n_education n_employment \n",
"6 NaN 1.0 \n",
"11 NaN 3.0 \n",
"29 1.0 1.0 \n",
"46 1.0 1.0 \n",
"158 NaN NaN "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.url_domains.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3226518</th>\n",
" <td>0000-0002-1234-835X</td>\n",
" <td>219.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4206055</th>\n",
" <td>0000-0001-7478-4539</td>\n",
" <td>174.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4901870</th>\n",
" <td>0000-0002-7392-3792</td>\n",
" <td>169.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8184260</th>\n",
" <td>0000-0002-6938-9638</td>\n",
" <td>152.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2743648</th>\n",
" <td>0000-0002-5710-4041</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989644</th>\n",
" <td>0000-0002-1686-1935</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989646</th>\n",
" <td>0000-0002-8783-5814</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989647</th>\n",
" <td>0000-0002-7584-2283</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989648</th>\n",
" <td>0000-0003-0529-3538</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10989649 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_urls\n",
"3226518 0000-0002-1234-835X 219.0\n",
"4206055 0000-0001-7478-4539 174.0\n",
"4901870 0000-0002-7392-3792 169.0\n",
"8184260 0000-0002-6938-9638 152.0\n",
"2743648 0000-0002-5710-4041 114.0\n",
"... ... ...\n",
"10989644 0000-0002-1686-1935 NaN\n",
"10989645 0000-0002-3800-6331 NaN\n",
"10989646 0000-0002-8783-5814 NaN\n",
"10989647 0000-0002-7584-2283 NaN\n",
"10989648 0000-0003-0529-3538 NaN\n",
"\n",
"[10989649 rows x 2 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"urls_by_orcid = df[['orcid', 'n_urls']].sort_values('n_urls', ascending=False)\n",
"urls_by_orcid"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0002-1234-835X",
"0000-0001-7478-4539",
"0000-0002-7392-3792",
"0000-0002-6938-9638",
"0000-0002-5710-4041",
"0000-0003-2450-090X",
"0000-0002-3920-7389",
"0000-0002-6689-4129",
"0000-0001-5384-0001",
"0000-0002-4621-5571",
"0000-0001-9131-1266",
"0000-0002-7754-8889",
"0000-0002-5250-1144",
"0000-0002-9025-8632",
"0000-0002-7456-3848",
"0000-0003-0176-1293",
"0000-0003-0321-7339",
"0000-0002-8493-0402",
"0000-0002-9965-2425",
"0000-0001-8873-6677",
"0000-0002-3997-5070",
"0000-0002-1856-6905",
"0000-0002-4316-1467",
"0000-0002-4062-3603",
"0000-0003-0594-2462",
"0000-0001-5880-7091",
"0000-0003-1524-6268",
"0000-0002-0752-7513",
"0000-0003-2593-7134",
"0000-0002-1298-5252",
"0000-0003-1761-3842",
"0000-0003-2383-8386",
"0000-0003-3546-2312",
"0000-0002-2886-9248",
"0000-0003-2183-8112",
"0000-0002-1929-6054",
"0000-0003-4948-9268",
"0000-0003-2407-3557",
"0000-0002-9276-6921",
"0000-0003-1484-6958",
"0000-0002-7568-3403",
"0000-0002-4305-4215",
"0000-0002-4004-6666",
"0000-0003-0796-0234",
"0000-0001-7133-6896",
"0000-0002-8208-0897",
"0000-0002-9071-5450",
"0000-0003-4993-5555",
"0000-0003-0930-6121",
"0000-0002-8116-9611",
"0000-0002-5139-2660",
"0000-0002-3277-9659",
"0000-0002-8122-879X",
"0000-0001-9559-1103",
"0000-0003-2862-6315",
"0000-0002-2000-8339",
"0000-0001-5300-4601",
"0000-0002-6254-8683",
"0000-0002-6547-0172",
"0000-0003-4808-6619",
"0000-0003-3933-0229",
"0000-0002-0971-9375",
"0000-0003-0694-1154",
"0000-0003-1585-1134",
"0000-0002-4659-5391",
"0000-0002-2916-2893",
"0000-0001-6783-2037",
"0000-0001-6461-2573",
"0000-0003-4501-3756",
"0000-0001-5549-6822",
"0000-0002-8940-3177",
"0000-0003-4326-9336",
"0000-0001-8096-4333",
"0000-0001-8978-4830",
"0000-0002-5946-1595",
"0000-0002-6680-1703",
"0000-0002-8593-9257",
"0000-0002-7653-4899",
"0000-0003-1904-4188",
"0000-0002-5196-4905",
"0000-0001-8808-4867",
"0000-0001-6921-0426",
"0000-0003-1815-1993",
"0000-0002-7843-8497",
"0000-0003-1675-2840",
"0000-0001-8644-2114",
"0000-0003-0907-9870",
"0000-0001-7784-0583",
"0000-0001-7550-5802",
"0000-0001-8986-2528",
"0000-0002-5265-6074",
"0000-0001-9102-8639",
"0000-0002-0696-8560",
"0000-0001-6979-4273",
"0000-0002-7179-6953",
"0000-0002-3334-9386",
"0000-0001-6714-009X",
"0000-0001-7193-5039",
"0000-0002-5241-1026",
"0000-0001-7608-9433"
],
"y": [
219,
174,
169,
152,
114,
114,
111,
104,
104,
90,
83,
83,
81,
81,
80,
80,
80,
76,
73,
72,
71,
70,
69,
69,
68,
68,
68,
68,
67,
67,
66,
66,
65,
64,
61,
61,
61,
59,
57,
57,
57,
57,
57,
57,
57,
56,
55,
55,
55,
55,
51,
50,
50,
50,
49,
49,
48,
48,
48,
48,
47,
47,
46,
46,
46,
45,
45,
45,
45,
44,
43,
43,
43,
43,
42,
42,
42,
41,
41,
41,
40,
40,
39,
39,
39,
39,
38,
38,
38,
38,
38,
37,
37,
37,
37,
37,
36,
36,
36,
36
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 100 ORCID iDs with URLs"
},
"xaxis": {
"range": [
-0.5,
99.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"3c7ec61e-2ef7-40f9-92e8-684b6b3a4a5a\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"3c7ec61e-2ef7-40f9-92e8-684b6b3a4a5a\")) { Plotly.newPlot( \"3c7ec61e-2ef7-40f9-92e8-684b6b3a4a5a\", [{\"type\": \"bar\", \"x\": [\"0000-0002-1234-835X\", \"0000-0001-7478-4539\", \"0000-0002-7392-3792\", \"0000-0002-6938-9638\", \"0000-0002-5710-4041\", \"0000-0003-2450-090X\", \"0000-0002-3920-7389\", \"0000-0002-6689-4129\", \"0000-0001-5384-0001\", \"0000-0002-4621-5571\", \"0000-0001-9131-1266\", \"0000-0002-7754-8889\", \"0000-0002-5250-1144\", \"0000-0002-9025-8632\", \"0000-0002-7456-3848\", \"0000-0003-0176-1293\", \"0000-0003-0321-7339\", \"0000-0002-8493-0402\", \"0000-0002-9965-2425\", \"0000-0001-8873-6677\", \"0000-0002-3997-5070\", \"0000-0002-1856-6905\", \"0000-0002-4316-1467\", \"0000-0002-4062-3603\", \"0000-0003-0594-2462\", \"0000-0001-5880-7091\", \"0000-0003-1524-6268\", \"0000-0002-0752-7513\", \"0000-0003-2593-7134\", \"0000-0002-1298-5252\", \"0000-0003-1761-3842\", \"0000-0003-2383-8386\", \"0000-0003-3546-2312\", \"0000-0002-2886-9248\", \"0000-0003-2183-8112\", \"0000-0002-1929-6054\", \"0000-0003-4948-9268\", \"0000-0003-2407-3557\", \"0000-0002-9276-6921\", \"0000-0003-1484-6958\", \"0000-0002-7568-3403\", \"0000-0002-4305-4215\", \"0000-0002-4004-6666\", \"0000-0003-0796-0234\", \"0000-0001-7133-6896\", \"0000-0002-8208-0897\", \"0000-0002-9071-5450\", \"0000-0003-4993-5555\", \"0000-0003-0930-6121\", \"0000-0002-8116-9611\", \"0000-0002-5139-2660\", \"0000-0002-3277-9659\", \"0000-0002-8122-879X\", \"0000-0001-9559-1103\", \"0000-0003-2862-6315\", \"0000-0002-2000-8339\", \"0000-0001-5300-4601\", \"0000-0002-6254-8683\", \"0000-0002-6547-0172\", \"0000-0003-4808-6619\", \"0000-0003-3933-0229\", \"0000-0002-0971-9375\", \"0000-0003-0694-1154\", \"0000-0003-1585-1134\", \"0000-0002-4659-5391\", \"0000-0002-2916-2893\", \"0000-0001-6783-2037\", \"0000-0001-6461-2573\", \"0000-0003-4501-3756\", \"0000-0001-5549-6822\", \"0000-0002-8940-3177\", \"0000-0003-4326-9336\", \"0000-0001-8096-4333\", \"0000-0001-8978-4830\", \"0000-0002-5946-1595\", \"0000-0002-6680-1703\", \"0000-0002-8593-9257\", \"0000-0002-7653-4899\", \"0000-0003-1904-4188\", \"0000-0002-5196-4905\", \"0000-0001-8808-4867\", \"0000-0001-6921-0426\", \"0000-0003-1815-1993\", \"0000-0002-7843-8497\", \"0000-0003-1675-2840\", \"0000-0001-8644-2114\", \"0000-0003-0907-9870\", \"0000-0001-7784-0583\", \"0000-0001-7550-5802\", \"0000-0001-8986-2528\", \"0000-0002-5265-6074\", \"0000-0001-9102-8639\", \"0000-0002-0696-8560\", \"0000-0001-6979-4273\", \"0000-0002-7179-6953\", \"0000-0002-3334-9386\", \"0000-0001-6714-009X\", \"0000-0001-7193-5039\", \"0000-0002-5241-1026\", \"0000-0001-7608-9433\"], \"y\": [219.0, 174.0, 169.0, 152.0, 114.0, 114.0, 111.0, 104.0, 104.0, 90.0, 83.0, 83.0, 81.0, 81.0, 80.0, 80.0, 80.0, 76.0, 73.0, 72.0, 71.0, 70.0, 69.0, 69.0, 68.0, 68.0, 68.0, 68.0, 67.0, 67.0, 66.0, 66.0, 65.0, 64.0, 61.0, 61.0, 61.0, 59.0, 57.0, 57.0, 57.0, 57.0, 57.0, 57.0, 57.0, 56.0, 55.0, 55.0, 55.0, 55.0, 51.0, 50.0, 50.0, 50.0, 49.0, 49.0, 48.0, 48.0, 48.0, 48.0, 47.0, 47.0, 46.0, 46.0, 46.0, 45.0, 45.0, 45.0, 45.0, 44.0, 43.0, 43.0, 43.0, 43.0, 42.0, 42.0, 42.0, 41.0, 41.0, 41.0, 40.0, 40.0, 39.0, 39.0, 39.0, 39.0, 38.0, 38.0, 38.0, 38.0, 38.0, 37.0, 37.0, 37.0, 37.0, 37.0, 36.0, 36.0, 36.0, 36.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 100 ORCID iDs with URLs\"}, \"xaxis\": {\"range\": [-0.5, 99.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('3c7ec61e-2ef7-40f9-92e8-684b6b3a4a5a');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(100)\n",
"data = [\n",
" go.Bar(\n",
" x=urls_by_orcid[:TOP_N]['orcid'],\n",
" y=urls_by_orcid[:TOP_N]['n_urls']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s ORCID iDs with URLs' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"top_urls = df[['orcid', 'url_domains']]\\\n",
" .explode('url_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('url_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"linkedin.com",
"researchgate.net",
"google.com",
"cnpq.br",
"academia.edu",
"twitter.com",
"facebook.com",
"publons.com",
"wordpress.com",
"mendeley.com",
"instagram.com",
"github.io",
"google.com.ua",
"blogspot.com",
"github.com",
"google.es",
"helsinki.fi",
"unirioja.es",
"youtube.com",
"wixsite.com",
"ku.dk",
"",
"scopus.com",
"weebly.com",
"us.es",
"kth.se",
"cityu.edu.hk",
"au.dk",
"kcl.ac.uk",
"man.ac.uk",
"google.com.au",
"ucl.ac.uk",
"sdu.dk",
"ugr.es",
"researcherid.com",
"mq.edu.au",
"ntu.edu.tw",
"dtu.dk",
"rug.nl",
"colciencias.gov.co",
"google.co.in",
"bris.ac.uk",
"uwa.edu.au",
"uc3m.es",
"vub.be",
"bu.edu",
"monash.edu",
"google.co.uk",
"aau.dk",
"lancs.ac.uk"
],
"y": [
78418,
67823,
44804,
24635,
21174,
19046,
15368,
10751,
9043,
6960,
6040,
5516,
5371,
5272,
5252,
5163,
4730,
4590,
4470,
4140,
3771,
3620,
3586,
3122,
3037,
2957,
2795,
2746,
2724,
2689,
2610,
2586,
2478,
2231,
2134,
2133,
2094,
2002,
1975,
1929,
1917,
1840,
1820,
1804,
1803,
1803,
1772,
1656,
1653,
1650
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-50 URL domains"
},
"xaxis": {
"range": [
-0.5,
49.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"a8028b62-3b6e-4a53-91a8-f585b09423bf\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"a8028b62-3b6e-4a53-91a8-f585b09423bf\")) { Plotly.newPlot( \"a8028b62-3b6e-4a53-91a8-f585b09423bf\", [{\"type\": \"bar\", \"x\": [\"linkedin.com\", \"researchgate.net\", \"google.com\", \"cnpq.br\", \"academia.edu\", \"twitter.com\", \"facebook.com\", \"publons.com\", \"wordpress.com\", \"mendeley.com\", \"instagram.com\", \"github.io\", \"google.com.ua\", \"blogspot.com\", \"github.com\", \"google.es\", \"helsinki.fi\", \"unirioja.es\", \"youtube.com\", \"wixsite.com\", \"ku.dk\", \"\", \"scopus.com\", \"weebly.com\", \"us.es\", \"kth.se\", \"cityu.edu.hk\", \"au.dk\", \"kcl.ac.uk\", \"man.ac.uk\", \"google.com.au\", \"ucl.ac.uk\", \"sdu.dk\", \"ugr.es\", \"researcherid.com\", \"mq.edu.au\", \"ntu.edu.tw\", \"dtu.dk\", \"rug.nl\", \"colciencias.gov.co\", \"google.co.in\", \"bris.ac.uk\", \"uwa.edu.au\", \"uc3m.es\", \"vub.be\", \"bu.edu\", \"monash.edu\", \"google.co.uk\", \"aau.dk\", \"lancs.ac.uk\"], \"y\": [78418, 67823, 44804, 24635, 21174, 19046, 15368, 10751, 9043, 6960, 6040, 5516, 5371, 5272, 5252, 5163, 4730, 4590, 4470, 4140, 3771, 3620, 3586, 3122, 3037, 2957, 2795, 2746, 2724, 2689, 2610, 2586, 2478, 2231, 2134, 2133, 2094, 2002, 1975, 1929, 1917, 1840, 1820, 1804, 1803, 1803, 1772, 1656, 1653, 1650]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top-50 URL domains\"}, \"xaxis\": {\"range\": [-0.5, 49.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('a8028b62-3b6e-4a53-91a8-f585b09423bf');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(50)\n",
"data = [\n",
" go.Bar(\n",
" x=top_urls[:TOP_N].index,\n",
" y=top_urls[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s URL domains' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## URLs speculation"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1025713</th>\n",
" <td>0000-0003-2407-3557</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>abdul</td>\n",
" <td>aziz</td>\n",
" <td>abdul aziz was born on may 25, 1973, in brebes...</td>\n",
" <td>[abdul aziz, aziz, abdul, aziz, a., aziz, abd,...</td>\n",
" <td>NaN</td>\n",
" <td>[ekonomi islam, etika bisnis islam, ilmu ekono...</td>\n",
" <td>NaN</td>\n",
" <td>[[ilmu ekonomi, dr, universitas borobudur, jak...</td>\n",
" <td>[[assisten professor/dr, institut agama islam ...</td>\n",
" <td>72</td>\n",
" <td>[base - bielefeld academic search engine, abdu...</td>\n",
" <td>2016-09-12t04:41:24.842z</td>\n",
" <td>2021-01-26t11:58:33.039z</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>77</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, syekhnurjati.ac.id, orcid.org, bl...</td>\n",
" <td>NaN</td>\n",
" <td>59.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2743648</th>\n",
" <td>0000-0002-5710-4041</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>ryszard</td>\n",
" <td>romaniuk</td>\n",
" <td>professor of electronics and communications en...</td>\n",
" <td>[r.romaniuk, r.s.romaniuk, ryszard romaniuk, r...</td>\n",
" <td>rrom@ise.pw.edu.pl</td>\n",
" <td>[measurement systems, electronics, photonics, ...</td>\n",
" <td>[[isni, 0000000071432485], [researcherid, b-91...</td>\n",
" <td>[[faculty of electronics and information techn...</td>\n",
" <td>[[professor, institute director, politechnika ...</td>\n",
" <td>5008</td>\n",
" <td>[inspire-hep, researcherid, isni2orcid search ...</td>\n",
" <td>2013-01-20t12:09:21.600z</td>\n",
" <td>2021-03-16t19:37:31.650z</td>\n",
" <td>1221</td>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>1742</td>\n",
" <td>1</td>\n",
" <td>ise.pw.edu.pl</td>\n",
" <td>[ise.pw.edu.pl, elka.pw.edu.pl, cern.ch]</td>\n",
" <td>[google.pl, publons.com, scopus.com, mendeley....</td>\n",
" <td>3.0</td>\n",
" <td>114.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3011724</th>\n",
" <td>0000-0003-2450-090X</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>eduard</td>\n",
" <td>babulak</td>\n",
" <td>professor eduard babulak is accomplished inter...</td>\n",
" <td>[professor eduard babulak]</td>\n",
" <td>NaN</td>\n",
" <td>[next generation of ict and eservices, compute...</td>\n",
" <td>[[scopus author id, 6506867432], [researcherid...</td>\n",
" <td>[[information technology, doctor habilitated (...</td>\n",
" <td>[[consultant, horizon 2020 framework programme...</td>\n",
" <td>274</td>\n",
" <td>[the lens, base - bielefeld academic search en...</td>\n",
" <td>2013-04-03t08:02:30.013z</td>\n",
" <td>2021-02-28t10:07:13.231z</td>\n",
" <td>199</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>174</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[worldassessmentcouncil.org, spseke.sk, bcs.or...</td>\n",
" <td>NaN</td>\n",
" <td>114.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" <td>6.0</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3881064</th>\n",
" <td>0000-0002-3920-7389</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>а.</td>\n",
" <td>гусев</td>\n",
" <td>surname, name gusev alexander leonidovichdate...</td>\n",
" <td>[alexander l. gusev , alexander leonidovich gu...</td>\n",
" <td>NaN</td>\n",
" <td>[photochromic, electrochromic, storage and tra...</td>\n",
" <td>[[researcherid, f-8048-2014], [scopus author i...</td>\n",
" <td>[[chemical technology and cryogenic-vacuum tec...</td>\n",
" <td>[[general director, scientific technical centr...</td>\n",
" <td>472</td>\n",
" <td>[publons, datacite, scopus - elsevier, a.l. gu...</td>\n",
" <td>2014-05-14t00:01:28.030z</td>\n",
" <td>2021-01-16t13:44:14.134z</td>\n",
" <td>37</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>21</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[youtube.com, isjaee.com, researchgate.net, re...</td>\n",
" <td>NaN</td>\n",
" <td>111.0</td>\n",
" <td>2.0</td>\n",
" <td>16.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7466062</th>\n",
" <td>0000-0002-1929-6054</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>franklin américo</td>\n",
" <td>canaza choque</td>\n",
" <td>docente-investigador social. maestrando en der...</td>\n",
" <td>[franklin américo canaza-choque , franklin a. ...</td>\n",
" <td>leo_123fa@hotmail.com</td>\n",
" <td>[justicia global; democracia; derechos humanos...</td>\n",
" <td>[[researcherid, p-8613-2018], [loop profile, 8...</td>\n",
" <td>[[facultad de ciencias de la educación , maest...</td>\n",
" <td>[[investigador social, universidad católica de...</td>\n",
" <td>39</td>\n",
" <td>[researcherid, base - bielefeld academic searc...</td>\n",
" <td>2017-09-15t19:45:43.483z</td>\n",
" <td>2021-03-23t20:12:47.297z</td>\n",
" <td>30</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" <td>1</td>\n",
" <td>hotmail.com</td>\n",
" <td>[gmail.com, gmail.com, hotmail.com, baldwin.ed...</td>\n",
" <td>[concytec.gob.pe, redalyc.org, redalyc.org, un...</td>\n",
" <td>5.0</td>\n",
" <td>61.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7517096</th>\n",
" <td>0000-0003-4948-9268</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>gustavo</td>\n",
" <td>duperré</td>\n",
" <td>gustavo norberto duperré graduated in arts and...</td>\n",
" <td>[gustavo norberto duperré, duperré, g. n., gus...</td>\n",
" <td>gustavo.duperre@usal.edu.ar</td>\n",
" <td>[sciences of antiquity, social sciences, compu...</td>\n",
" <td>[[scopus author id, 57195936346], [researcheri...</td>\n",
" <td>[[programme in history, history of art and ter...</td>\n",
" <td>[[titular professor, dirección general de cult...</td>\n",
" <td>41</td>\n",
" <td>[gustavo duperré, scopus - elsevier, publons, ...</td>\n",
" <td>2020-02-22t15:49:52.386z</td>\n",
" <td>2021-03-12t15:13:44.065z</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" <td>usal.edu.ar</td>\n",
" <td>NaN</td>\n",
" <td>[icomos.ro, unirioja.es, unirioja.es, unc.edu....</td>\n",
" <td>NaN</td>\n",
" <td>61.0</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8068275</th>\n",
" <td>0000-0003-2183-8112</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>pelayo munhoz</td>\n",
" <td>olea</td>\n",
" <td>pós-doutorado em gestão ambiental pela univers...</td>\n",
" <td>[ munhoz, pelayo olea, olea, pelayo, olea, p...</td>\n",
" <td>NaN</td>\n",
" <td>[empreendedorismo, sustentabilidade, inovação]</td>\n",
" <td>[[scopus author id, 55175503300], [researcheri...</td>\n",
" <td>[[, postdoctoral in environmental sustainabili...</td>\n",
" <td>[[professor, universidade federal do rio grand...</td>\n",
" <td>1109</td>\n",
" <td>[the lens, pelayo munhoz olea, dimensions, bas...</td>\n",
" <td>2013-02-04t17:25:34.723z</td>\n",
" <td>2021-03-19t18:51:01.128z</td>\n",
" <td>798</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>582</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c...</td>\n",
" <td>NaN</td>\n",
" <td>61.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8184260</th>\n",
" <td>0000-0002-6938-9638</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>adolfo</td>\n",
" <td>catral sanabria</td>\n",
" <td>my education is in computer science, mathemati...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[loop profile, 747193]]</td>\n",
" <td>[[education, capacitación para la enseñanza en...</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" <td>[base - bielefeld academic search engine, data...</td>\n",
" <td>2019-05-07t19:27:02.210z</td>\n",
" <td>2020-12-10t23:39:15.236z</td>\n",
" <td>2022</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[researchgate.net, youtube.com, linkedin.com, ...</td>\n",
" <td>NaN</td>\n",
" <td>152.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8791256</th>\n",
" <td>0000-0002-9025-8632</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>buycannabis</td>\n",
" <td>dispensary</td>\n",
" <td>we procure and deliver premium cannabis strain...</td>\n",
" <td>[we procure and deliver premium cannabis strai...</td>\n",
" <td>NaN</td>\n",
" <td>[canabis dispensary, cannabis, cannabis commun...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" <td>[goowonderland dispensary]</td>\n",
" <td>2020-12-09t21:19:46.004z</td>\n",
" <td>2020-12-10t01:17:28.772z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[goowonderland.com, goowonderland.com, goowond...</td>\n",
" <td>NaN</td>\n",
" <td>81.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10174509</th>\n",
" <td>0000-0002-9965-2425</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>jaroslaw</td>\n",
" <td>spychala</td>\n",
" <td>jaroslaw spychala has received a doctoral degr...</td>\n",
" <td>[jaroslaw jozef spychala]</td>\n",
" <td>NaN</td>\n",
" <td>[medicinal and pharmaceutical chemistry, photo...</td>\n",
" <td>[[scopus author id, 7006745874]]</td>\n",
" <td>[[department of chemistry, postdoctoral associ...</td>\n",
" <td>[[assistant professor, adam mickiewicz univers...</td>\n",
" <td>29</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2014-09-18t12:34:14.242z</td>\n",
" <td>2020-02-11t14:31:25.544z</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[biowebspin.com, biowebspin.com, google.com, l...</td>\n",
" <td>NaN</td>\n",
" <td>73.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10257808</th>\n",
" <td>0000-0002-4062-3603</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>juan de dios</td>\n",
" <td>beltrán mancilla</td>\n",
" <td>juan de dios beltrán mancilla (*) filósofo aut...</td>\n",
" <td>[juan de dios beltrán mancilla, filósofo autod...</td>\n",
" <td>NaN</td>\n",
" <td>[filosofia medicina arquitectura economía dere...</td>\n",
" <td>NaN</td>\n",
" <td>[[, diplomado en practicas directivas para or...</td>\n",
" <td>[[inspector general jornada vespertina // de 2...</td>\n",
" <td>11</td>\n",
" <td>[juan de dios beltr´´án mancilla]</td>\n",
" <td>2020-04-19t21:06:33.495z</td>\n",
" <td>2021-02-10t20:13:07.698z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[yumpu.com, ijopm.org, google.com, blogspot.co...</td>\n",
" <td>NaN</td>\n",
" <td>69.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10486212</th>\n",
" <td>0000-0002-3997-5070</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>dr. parameshachari</td>\n",
" <td>b d</td>\n",
" <td>dr. parameshachari b dacm distinguished speake...</td>\n",
" <td>[dr. parameshachari b d]</td>\n",
" <td>NaN</td>\n",
" <td>[professor &amp; head |dept. of tce| gsssiet for w...</td>\n",
" <td>[[researcherid, f-7045-2018], [scopus author i...</td>\n",
" <td>[[electronics and communication engineering, p...</td>\n",
" <td>[[acm distinguished speaker (volunteer), assoc...</td>\n",
" <td>93</td>\n",
" <td>[publons, multidisciplinary digital publishing...</td>\n",
" <td>2016-08-24t11:00:30.403z</td>\n",
" <td>2021-03-23t07:16:22.582z</td>\n",
" <td>47</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>48</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[geethashishu.in, geethashishu.in, acm.org, go...</td>\n",
" <td>NaN</td>\n",
" <td>71.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10652632</th>\n",
" <td>0000-0003-2593-7134</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>aan</td>\n",
" <td>jaelani</td>\n",
" <td>all my papers can be downloaded from portal:re...</td>\n",
" <td>[jaelani, a., jaelani, aan]</td>\n",
" <td>aan_jaelani@syekhnurjati.ac.id</td>\n",
" <td>[tourism industry, islamic finance and banking...</td>\n",
" <td>[[scopus author id, 57195963463], [loop profil...</td>\n",
" <td>[[post graduate, s3/dr, universitas islam nege...</td>\n",
" <td>[[dr, institut agama islam negeri syekh nurjat...</td>\n",
" <td>79</td>\n",
" <td>[publons, aan jaelani, scopus - elsevier, dime...</td>\n",
" <td>2016-03-02t18:37:44.989z</td>\n",
" <td>2021-03-19t10:11:57.908z</td>\n",
" <td>88</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>193</td>\n",
" <td>1</td>\n",
" <td>syekhnurjati.ac.id</td>\n",
" <td>[gmail.com]</td>\n",
" <td>[microsoft.com, twitter.com, academia.edu, aca...</td>\n",
" <td>1.0</td>\n",
" <td>67.0</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"1025713 0000-0003-2407-3557 1 1 \n",
"2743648 0000-0002-5710-4041 1 1 \n",
"3011724 0000-0003-2450-090X 1 1 \n",
"3881064 0000-0002-3920-7389 1 1 \n",
"7466062 0000-0002-1929-6054 1 1 \n",
"7517096 0000-0003-4948-9268 1 1 \n",
"8068275 0000-0003-2183-8112 1 1 \n",
"8184260 0000-0002-6938-9638 1 1 \n",
"8791256 0000-0002-9025-8632 1 1 \n",
"10174509 0000-0002-9965-2425 1 1 \n",
"10257808 0000-0002-4062-3603 1 1 \n",
"10486212 0000-0002-3997-5070 1 1 \n",
"10652632 0000-0003-2593-7134 1 1 \n",
"\n",
" given_names family_name \\\n",
"1025713 abdul aziz \n",
"2743648 ryszard romaniuk \n",
"3011724 eduard babulak \n",
"3881064 а. гусев \n",
"7466062 franklin américo canaza choque \n",
"7517096 gustavo duperré \n",
"8068275 pelayo munhoz olea \n",
"8184260 adolfo catral sanabria \n",
"8791256 buycannabis dispensary \n",
"10174509 jaroslaw spychala \n",
"10257808 juan de dios beltrán mancilla \n",
"10486212 dr. parameshachari b d \n",
"10652632 aan jaelani \n",
"\n",
" biography \\\n",
"1025713 abdul aziz was born on may 25, 1973, in brebes... \n",
"2743648 professor of electronics and communications en... \n",
"3011724 professor eduard babulak is accomplished inter... \n",
"3881064 surname, name gusev alexander leonidovichdate... \n",
"7466062 docente-investigador social. maestrando en der... \n",
"7517096 gustavo norberto duperré graduated in arts and... \n",
"8068275 pós-doutorado em gestão ambiental pela univers... \n",
"8184260 my education is in computer science, mathemati... \n",
"8791256 we procure and deliver premium cannabis strain... \n",
"10174509 jaroslaw spychala has received a doctoral degr... \n",
"10257808 juan de dios beltrán mancilla (*) filósofo aut... \n",
"10486212 dr. parameshachari b dacm distinguished speake... \n",
"10652632 all my papers can be downloaded from portal:re... \n",
"\n",
" other_names \\\n",
"1025713 [abdul aziz, aziz, abdul, aziz, a., aziz, abd,... \n",
"2743648 [r.romaniuk, r.s.romaniuk, ryszard romaniuk, r... \n",
"3011724 [professor eduard babulak] \n",
"3881064 [alexander l. gusev , alexander leonidovich gu... \n",
"7466062 [franklin américo canaza-choque , franklin a. ... \n",
"7517096 [gustavo norberto duperré, duperré, g. n., gus... \n",
"8068275 [ munhoz, pelayo olea, olea, pelayo, olea, p... \n",
"8184260 NaN \n",
"8791256 [we procure and deliver premium cannabis strai... \n",
"10174509 [jaroslaw jozef spychala] \n",
"10257808 [juan de dios beltrán mancilla, filósofo autod... \n",
"10486212 [dr. parameshachari b d] \n",
"10652632 [jaelani, a., jaelani, aan] \n",
"\n",
" primary_email \\\n",
"1025713 NaN \n",
"2743648 rrom@ise.pw.edu.pl \n",
"3011724 NaN \n",
"3881064 NaN \n",
"7466062 leo_123fa@hotmail.com \n",
"7517096 gustavo.duperre@usal.edu.ar \n",
"8068275 NaN \n",
"8184260 NaN \n",
"8791256 NaN \n",
"10174509 NaN \n",
"10257808 NaN \n",
"10486212 NaN \n",
"10652632 aan_jaelani@syekhnurjati.ac.id \n",
"\n",
" keywords \\\n",
"1025713 [ekonomi islam, etika bisnis islam, ilmu ekono... \n",
"2743648 [measurement systems, electronics, photonics, ... \n",
"3011724 [next generation of ict and eservices, compute... \n",
"3881064 [photochromic, electrochromic, storage and tra... \n",
"7466062 [justicia global; democracia; derechos humanos... \n",
"7517096 [sciences of antiquity, social sciences, compu... \n",
"8068275 [empreendedorismo, sustentabilidade, inovação] \n",
"8184260 NaN \n",
"8791256 [canabis dispensary, cannabis, cannabis commun... \n",
"10174509 [medicinal and pharmaceutical chemistry, photo... \n",
"10257808 [filosofia medicina arquitectura economía dere... \n",
"10486212 [professor & head |dept. of tce| gsssiet for w... \n",
"10652632 [tourism industry, islamic finance and banking... \n",
"\n",
" external_ids \\\n",
"1025713 NaN \n",
"2743648 [[isni, 0000000071432485], [researcherid, b-91... \n",
"3011724 [[scopus author id, 6506867432], [researcherid... \n",
"3881064 [[researcherid, f-8048-2014], [scopus author i... \n",
"7466062 [[researcherid, p-8613-2018], [loop profile, 8... \n",
"7517096 [[scopus author id, 57195936346], [researcheri... \n",
"8068275 [[scopus author id, 55175503300], [researcheri... \n",
"8184260 [[loop profile, 747193]] \n",
"8791256 NaN \n",
"10174509 [[scopus author id, 7006745874]] \n",
"10257808 NaN \n",
"10486212 [[researcherid, f-7045-2018], [scopus author i... \n",
"10652632 [[scopus author id, 57195963463], [loop profil... \n",
"\n",
" education \\\n",
"1025713 [[ilmu ekonomi, dr, universitas borobudur, jak... \n",
"2743648 [[faculty of electronics and information techn... \n",
"3011724 [[information technology, doctor habilitated (... \n",
"3881064 [[chemical technology and cryogenic-vacuum tec... \n",
"7466062 [[facultad de ciencias de la educación , maest... \n",
"7517096 [[programme in history, history of art and ter... \n",
"8068275 [[, postdoctoral in environmental sustainabili... \n",
"8184260 [[education, capacitación para la enseñanza en... \n",
"8791256 NaN \n",
"10174509 [[department of chemistry, postdoctoral associ... \n",
"10257808 [[, diplomado en practicas directivas para or... \n",
"10486212 [[electronics and communication engineering, p... \n",
"10652632 [[post graduate, s3/dr, universitas islam nege... \n",
"\n",
" employment n_works \\\n",
"1025713 [[assisten professor/dr, institut agama islam ... 72 \n",
"2743648 [[professor, institute director, politechnika ... 5008 \n",
"3011724 [[consultant, horizon 2020 framework programme... 274 \n",
"3881064 [[general director, scientific technical centr... 472 \n",
"7466062 [[investigador social, universidad católica de... 39 \n",
"7517096 [[titular professor, dirección general de cult... 41 \n",
"8068275 [[professor, universidade federal do rio grand... 1109 \n",
"8184260 NaN 2023 \n",
"8791256 NaN 10 \n",
"10174509 [[assistant professor, adam mickiewicz univers... 29 \n",
"10257808 [[inspector general jornada vespertina // de 2... 11 \n",
"10486212 [[acm distinguished speaker (volunteer), assoc... 93 \n",
"10652632 [[dr, institut agama islam negeri syekh nurjat... 79 \n",
"\n",
" works_source \\\n",
"1025713 [base - bielefeld academic search engine, abdu... \n",
"2743648 [inspire-hep, researcherid, isni2orcid search ... \n",
"3011724 [the lens, base - bielefeld academic search en... \n",
"3881064 [publons, datacite, scopus - elsevier, a.l. gu... \n",
"7466062 [researcherid, base - bielefeld academic searc... \n",
"7517096 [gustavo duperré, scopus - elsevier, publons, ... \n",
"8068275 [the lens, pelayo munhoz olea, dimensions, bas... \n",
"8184260 [base - bielefeld academic search engine, data... \n",
"8791256 [goowonderland dispensary] \n",
"10174509 [scopus - elsevier] \n",
"10257808 [juan de dios beltr´´án mancilla] \n",
"10486212 [publons, multidisciplinary digital publishing... \n",
"10652632 [publons, aan jaelani, scopus - elsevier, dime... \n",
"\n",
" activation_date last_update_date n_doi n_arxiv \\\n",
"1025713 2016-09-12t04:41:24.842z 2021-01-26t11:58:33.039z 19 0 \n",
"2743648 2013-01-20t12:09:21.600z 2021-03-16t19:37:31.650z 1221 25 \n",
"3011724 2013-04-03t08:02:30.013z 2021-02-28t10:07:13.231z 199 0 \n",
"3881064 2014-05-14t00:01:28.030z 2021-01-16t13:44:14.134z 37 0 \n",
"7466062 2017-09-15t19:45:43.483z 2021-03-23t20:12:47.297z 30 0 \n",
"7517096 2020-02-22t15:49:52.386z 2021-03-12t15:13:44.065z 13 0 \n",
"8068275 2013-02-04t17:25:34.723z 2021-03-19t18:51:01.128z 798 0 \n",
"8184260 2019-05-07t19:27:02.210z 2020-12-10t23:39:15.236z 2022 0 \n",
"8791256 2020-12-09t21:19:46.004z 2020-12-10t01:17:28.772z 0 0 \n",
"10174509 2014-09-18t12:34:14.242z 2020-02-11t14:31:25.544z 15 0 \n",
"10257808 2020-04-19t21:06:33.495z 2021-02-10t20:13:07.698z 0 0 \n",
"10486212 2016-08-24t11:00:30.403z 2021-03-23t07:16:22.582z 47 0 \n",
"10652632 2016-03-02t18:37:44.989z 2021-03-19t10:11:57.908z 88 0 \n",
"\n",
" n_pmc n_other_pids label primary_email_domain \\\n",
"1025713 0 77 1 NaN \n",
"2743648 0 1742 1 ise.pw.edu.pl \n",
"3011724 1 174 0 NaN \n",
"3881064 0 21 1 NaN \n",
"7466062 0 34 1 hotmail.com \n",
"7517096 0 34 0 usal.edu.ar \n",
"8068275 1 582 1 NaN \n",
"8184260 0 16 1 NaN \n",
"8791256 0 0 0 NaN \n",
"10174509 0 29 1 NaN \n",
"10257808 0 7 0 NaN \n",
"10486212 0 48 1 NaN \n",
"10652632 0 193 1 syekhnurjati.ac.id \n",
"\n",
" other_email_domains \\\n",
"1025713 NaN \n",
"2743648 [ise.pw.edu.pl, elka.pw.edu.pl, cern.ch] \n",
"3011724 NaN \n",
"3881064 NaN \n",
"7466062 [gmail.com, gmail.com, hotmail.com, baldwin.ed... \n",
"7517096 NaN \n",
"8068275 NaN \n",
"8184260 NaN \n",
"8791256 NaN \n",
"10174509 NaN \n",
"10257808 NaN \n",
"10486212 NaN \n",
"10652632 [gmail.com] \n",
"\n",
" url_domains n_emails n_urls \\\n",
"1025713 [google.com, syekhnurjati.ac.id, orcid.org, bl... NaN 59.0 \n",
"2743648 [google.pl, publons.com, scopus.com, mendeley.... 3.0 114.0 \n",
"3011724 [worldassessmentcouncil.org, spseke.sk, bcs.or... NaN 114.0 \n",
"3881064 [youtube.com, isjaee.com, researchgate.net, re... NaN 111.0 \n",
"7466062 [concytec.gob.pe, redalyc.org, redalyc.org, un... 5.0 61.0 \n",
"7517096 [icomos.ro, unirioja.es, unirioja.es, unc.edu.... NaN 61.0 \n",
"8068275 [cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c... NaN 61.0 \n",
"8184260 [researchgate.net, youtube.com, linkedin.com, ... NaN 152.0 \n",
"8791256 [goowonderland.com, goowonderland.com, goowond... NaN 81.0 \n",
"10174509 [biowebspin.com, biowebspin.com, google.com, l... NaN 73.0 \n",
"10257808 [yumpu.com, ijopm.org, google.com, blogspot.co... NaN 69.0 \n",
"10486212 [geethashishu.in, geethashishu.in, acm.org, go... NaN 71.0 \n",
"10652632 [microsoft.com, twitter.com, academia.edu, aca... 1.0 67.0 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"1025713 NaN 4.0 3.0 1.0 \n",
"2743648 3.0 5.0 1.0 1.0 \n",
"3011724 5.0 8.0 6.0 22.0 \n",
"3881064 2.0 16.0 2.0 7.0 \n",
"7466062 4.0 2.0 1.0 1.0 \n",
"7517096 2.0 11.0 6.0 5.0 \n",
"8068275 2.0 3.0 7.0 9.0 \n",
"8184260 1.0 NaN 6.0 NaN \n",
"8791256 NaN 7.0 NaN NaN \n",
"10174509 1.0 4.0 4.0 2.0 \n",
"10257808 NaN 1.0 8.0 6.0 \n",
"10486212 3.0 6.0 5.0 10.0 \n",
"10652632 4.0 7.0 2.0 1.0 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['url_domains'].str.len() > 50) & (df['n_works'] > 0)]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>47439</th>\n",
" <td>0000-0002-5967-2835</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>oleksiy</td>\n",
" <td>goryayinov</td>\n",
" <td>NaN</td>\n",
" <td>[алексей николаевич горяинов, о.м.горяїнов, а....</td>\n",
" <td>NaN</td>\n",
" <td>[diagnostics, transport, logistics]</td>\n",
" <td>[[researcherid, i-7977-2016]]</td>\n",
" <td>[[, дистанционный курс «ctl.sc2x: supply chain...</td>\n",
" <td>[[docent, kharkiv petro vasylenko national tec...</td>\n",
" <td>274</td>\n",
" <td>[oleksiy goryayinov]</td>\n",
" <td>2014-08-03t18:06:42.925z</td>\n",
" <td>2021-03-22t13:56:48.311z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[khntusg.com.ua, khntusg.com.ua, google.com.ua...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>14.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72557</th>\n",
" <td>0000-0002-3505-2797</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>[nurul malahayati]</td>\n",
" <td>2017-10-01t00:46:31.324z</td>\n",
" <td>2019-08-19t15:52:47.253z</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94081</th>\n",
" <td>0000-0003-3670-9620</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>NaN</td>\n",
" <td>[imploturbocompressor, innovation, gearturbine...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[carlos barrera]</td>\n",
" <td>2016-08-29t20:32:10.362z</td>\n",
" <td>2021-02-09t04:56:35.554z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>NaN</td>\n",
" <td>24.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>261673</th>\n",
" <td>0000-0002-5441-0465</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>NaN</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>NaN</td>\n",
" <td>[business management, research, human resource...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>[nuria hernández-león]</td>\n",
" <td>2015-11-28t07:18:58.442z</td>\n",
" <td>2021-03-05t16:37:47.403z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>19.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>326211</th>\n",
" <td>0000-0002-7781-6767</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>mohd nazri</td>\n",
" <td>ismail</td>\n",
" <td>born in penang, malaysia in 1971, dr. mohd had...</td>\n",
" <td>[ndum (national defence university of malaysia)]</td>\n",
" <td>NaN</td>\n",
" <td>[sensor, iot, voice over ip, wsn, design of ne...</td>\n",
" <td>[[scopus author id, 24372977800], [researcheri...</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, universiti pertahanan nasional mal...</td>\n",
" <td>35</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2016-09-06t02:25:52.974z</td>\n",
" <td>2020-10-20t06:55:55.051z</td>\n",
" <td>24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com.my, researchgate.net, academia.edu...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10579801</th>\n",
" <td>0000-0001-5087-6965</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>robert</td>\n",
" <td>ohara</td>\n",
" <td>systematics, evolutionary biology, and the his...</td>\n",
" <td>[r. ohara, r.j. ohara, robert ohara, robert...</td>\n",
" <td>NaN</td>\n",
" <td>[history and philosophy of science, ancient nu...</td>\n",
" <td>[[isni, 0000000138200102], [researcherid, b-47...</td>\n",
" <td>[[biology, ph.d., harvard university, cambridg...</td>\n",
" <td>NaN</td>\n",
" <td>45</td>\n",
" <td>[robert j. ohara]</td>\n",
" <td>2014-09-21t02:45:19.620z</td>\n",
" <td>2020-07-09t06:51:09.228z</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>72</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[rjohara.net, google.com, collegiateway.org, r...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10590882</th>\n",
" <td>0000-0002-3318-9861</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>shagufta</td>\n",
" <td>perveen</td>\n",
" <td>prof. dr. shagufta perveen is a professor at k...</td>\n",
" <td>NaN</td>\n",
" <td>shagufta792000@yahoo.com</td>\n",
" <td>[shagufta perveen university of southampton, s...</td>\n",
" <td>NaN</td>\n",
" <td>[[hej research institute of chemistry, phd che...</td>\n",
" <td>[[professor, king saud university college of p...</td>\n",
" <td>66</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2015-12-21t10:34:06.771z</td>\n",
" <td>2021-02-22t14:58:30.893z</td>\n",
" <td>56</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>66</td>\n",
" <td>1</td>\n",
" <td>yahoo.com</td>\n",
" <td>[msu.edu, ksu.edu.sa]</td>\n",
" <td>[shaguftaperveen.com, researchgate.net, ksu.ed...</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>NaN</td>\n",
" <td>25.0</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10766062</th>\n",
" <td>0000-0001-8960-9004</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>susan</td>\n",
" <td>bastani</td>\n",
" <td>NaN</td>\n",
" <td>[s. bastani, سوسن باستانی]</td>\n",
" <td>sbastani@alzahra.ac.ir</td>\n",
" <td>[online and offline communities, personal netw...</td>\n",
" <td>[[scopus author id, 16642098400]]</td>\n",
" <td>[[sociology, ph.d., university of toronto, tor...</td>\n",
" <td>[[professor, alzahra university, tehran, vanak...</td>\n",
" <td>20</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2019-07-10t06:50:46.255z</td>\n",
" <td>2020-10-07t04:08:01.961z</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>alzahra.ac.ir</td>\n",
" <td>[gmail.com, gmail.com]</td>\n",
" <td>[scopus.com, google.com, publons.com, zenodo.o...</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10807839</th>\n",
" <td>0000-0002-4379-6454</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[applied economics, applied econometrics, deve...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>[caroline wanjiru kariuki]</td>\n",
" <td>2020-03-18t10:18:04.007z</td>\n",
" <td>2021-02-11t14:40:38.515z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10911966</th>\n",
" <td>0000-0003-2311-0600</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>NaN</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>NaN</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>[myo kyaw hlaing]</td>\n",
" <td>2018-12-26t12:51:57.801z</td>\n",
" <td>2021-01-26t14:36:47.421z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>140 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"47439 0000-0002-5967-2835 1 1 \n",
"72557 0000-0002-3505-2797 1 1 \n",
"94081 0000-0003-3670-9620 1 1 \n",
"261673 0000-0002-5441-0465 1 1 \n",
"326211 0000-0002-7781-6767 1 1 \n",
"... ... ... ... \n",
"10579801 0000-0001-5087-6965 1 1 \n",
"10590882 0000-0002-3318-9861 1 1 \n",
"10766062 0000-0001-8960-9004 1 1 \n",
"10807839 0000-0002-4379-6454 1 1 \n",
"10911966 0000-0003-2311-0600 1 1 \n",
"\n",
" given_names family_name \\\n",
"47439 oleksiy goryayinov \n",
"72557 nurul malahayati \n",
"94081 carlos barrera \n",
"261673 nuria hernández-león \n",
"326211 mohd nazri ismail \n",
"... ... ... \n",
"10579801 robert ohara \n",
"10590882 shagufta perveen \n",
"10766062 susan bastani \n",
"10807839 caroline wanjiru kariuki \n",
"10911966 myo kyaw hlaing \n",
"\n",
" biography \\\n",
"47439 NaN \n",
"72557 google scholar \n",
"94081 im individual inventor, and this is my work; s... \n",
"261673 NaN \n",
"326211 born in penang, malaysia in 1971, dr. mohd had... \n",
"... ... \n",
"10579801 systematics, evolutionary biology, and the his... \n",
"10590882 prof. dr. shagufta perveen is a professor at k... \n",
"10766062 NaN \n",
"10807839 caroline holds a phd in economics from curtin ... \n",
"10911966 NaN \n",
"\n",
" other_names \\\n",
"47439 [алексей николаевич горяинов, о.м.горяїнов, а.... \n",
"72557 NaN \n",
"94081 [retrodynamic, novelinflow] \n",
"261673 [nuria h. león, nuria hernández león, hernánde... \n",
"326211 [ndum (national defence university of malaysia)] \n",
"... ... \n",
"10579801 [r. ohara, r.j. ohara, robert ohara, robert... \n",
"10590882 NaN \n",
"10766062 [s. bastani, سوسن باستانی] \n",
"10807839 NaN \n",
"10911966 [dr myo kyaw hlaing] \n",
"\n",
" primary_email \\\n",
"47439 NaN \n",
"72557 NaN \n",
"94081 NaN \n",
"261673 NaN \n",
"326211 NaN \n",
"... ... \n",
"10579801 NaN \n",
"10590882 shagufta792000@yahoo.com \n",
"10766062 sbastani@alzahra.ac.ir \n",
"10807839 NaN \n",
"10911966 NaN \n",
"\n",
" keywords \\\n",
"47439 [diagnostics, transport, logistics] \n",
"72557 NaN \n",
"94081 [imploturbocompressor, innovation, gearturbine... \n",
"261673 [business management, research, human resource... \n",
"326211 [sensor, iot, voice over ip, wsn, design of ne... \n",
"... ... \n",
"10579801 [history and philosophy of science, ancient nu... \n",
"10590882 [shagufta perveen university of southampton, s... \n",
"10766062 [online and offline communities, personal netw... \n",
"10807839 [applied economics, applied econometrics, deve... \n",
"10911966 [economic geology] \n",
"\n",
" external_ids \\\n",
"47439 [[researcherid, i-7977-2016]] \n",
"72557 [[researcherid, q-3861-2017]] \n",
"94081 [[loop profile, 394457]] \n",
"261673 NaN \n",
"326211 [[scopus author id, 24372977800], [researcheri... \n",
"... ... \n",
"10579801 [[isni, 0000000138200102], [researcherid, b-47... \n",
"10590882 NaN \n",
"10766062 [[scopus author id, 16642098400]] \n",
"10807839 NaN \n",
"10911966 NaN \n",
"\n",
" education \\\n",
"47439 [[, дистанционный курс «ctl.sc2x: supply chain... \n",
"72557 [[civil and transportation engineering , maste... \n",
"94081 NaN \n",
"261673 [[, course: social skills, university of salam... \n",
"326211 NaN \n",
"... ... \n",
"10579801 [[biology, ph.d., harvard university, cambridg... \n",
"10590882 [[hej research institute of chemistry, phd che... \n",
"10766062 [[sociology, ph.d., university of toronto, tor... \n",
"10807839 [[economics, doctor of philosophy , curtin uni... \n",
"10911966 NaN \n",
"\n",
" employment n_works \\\n",
"47439 [[docent, kharkiv petro vasylenko national tec... 274 \n",
"72557 [[senior lecturer, universitas syiah kuala, ba... 6 \n",
"94081 NaN 1 \n",
"261673 [[merchandise reception and expedition trainer... 11 \n",
"326211 [[lecturer, universiti pertahanan nasional mal... 35 \n",
"... ... ... \n",
"10579801 NaN 45 \n",
"10590882 [[professor, king saud university college of p... 66 \n",
"10766062 [[professor, alzahra university, tehran, vanak... 20 \n",
"10807839 [[director, educational development, strathmor... 4 \n",
"10911966 [[lecturer, union of myanmar ministry of educa... 2 \n",
"\n",
" works_source activation_date \\\n",
"47439 [oleksiy goryayinov] 2014-08-03t18:06:42.925z \n",
"72557 [nurul malahayati] 2017-10-01t00:46:31.324z \n",
"94081 [carlos barrera] 2016-08-29t20:32:10.362z \n",
"261673 [nuria hernández-león] 2015-11-28t07:18:58.442z \n",
"326211 [scopus - elsevier] 2016-09-06t02:25:52.974z \n",
"... ... ... \n",
"10579801 [robert j. ohara] 2014-09-21t02:45:19.620z \n",
"10590882 [scopus - elsevier] 2015-12-21t10:34:06.771z \n",
"10766062 [scopus - elsevier] 2019-07-10t06:50:46.255z \n",
"10807839 [caroline wanjiru kariuki] 2020-03-18t10:18:04.007z \n",
"10911966 [myo kyaw hlaing] 2018-12-26t12:51:57.801z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"47439 2021-03-22t13:56:48.311z 0 0 0 0 \n",
"72557 2019-08-19t15:52:47.253z 3 0 0 3 \n",
"94081 2021-02-09t04:56:35.554z 0 0 0 0 \n",
"261673 2021-03-05t16:37:47.403z 1 0 0 4 \n",
"326211 2020-10-20t06:55:55.051z 24 0 0 35 \n",
"... ... ... ... ... ... \n",
"10579801 2020-07-09t06:51:09.228z 23 0 0 72 \n",
"10590882 2021-02-22t14:58:30.893z 56 0 0 66 \n",
"10766062 2020-10-07t04:08:01.961z 19 0 0 33 \n",
"10807839 2021-02-11t14:40:38.515z 1 0 0 0 \n",
"10911966 2021-01-26t14:36:47.421z 1 0 0 2 \n",
"\n",
" label primary_email_domain other_email_domains \\\n",
"47439 1 NaN NaN \n",
"72557 1 NaN NaN \n",
"94081 1 NaN NaN \n",
"261673 1 NaN NaN \n",
"326211 1 NaN NaN \n",
"... ... ... ... \n",
"10579801 1 NaN NaN \n",
"10590882 1 yahoo.com [msu.edu, ksu.edu.sa] \n",
"10766062 1 alzahra.ac.ir [gmail.com, gmail.com] \n",
"10807839 0 NaN NaN \n",
"10911966 0 NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"47439 [khntusg.com.ua, khntusg.com.ua, google.com.ua... NaN 13.0 \n",
"72557 [google.com, ristekdikti.go.id, unsyiah.ac.id,... NaN 16.0 \n",
"94081 [blogspot.mx, behance.net, authorstream.com, d... NaN 24.0 \n",
"261673 [feriaempresamujer.com, escueladenegociosydire... NaN 16.0 \n",
"326211 [google.com.my, researchgate.net, academia.edu... NaN 16.0 \n",
"... ... ... ... \n",
"10579801 [rjohara.net, google.com, collegiateway.org, r... NaN 12.0 \n",
"10590882 [shaguftaperveen.com, researchgate.net, ksu.ed... 2.0 11.0 \n",
"10766062 [scopus.com, google.com, publons.com, zenodo.o... 2.0 11.0 \n",
"10807839 [scopus.com, mendeley.com, publons.com, resear... NaN 13.0 \n",
"10911966 [facebook.com, linkedin.com, instagram.com, re... NaN 12.0 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"47439 1.0 3.0 14.0 7.0 \n",
"72557 1.0 NaN 2.0 1.0 \n",
"94081 1.0 8.0 NaN NaN \n",
"261673 NaN 7.0 19.0 16.0 \n",
"326211 2.0 10.0 NaN 4.0 \n",
"... ... ... ... ... \n",
"10579801 3.0 5.0 1.0 NaN \n",
"10590882 NaN 25.0 3.0 7.0 \n",
"10766062 1.0 4.0 3.0 4.0 \n",
"10807839 NaN 4.0 3.0 6.0 \n",
"10911966 NaN 1.0 NaN 2.0 \n",
"\n",
"[140 rows x 30 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0002-5967-2835</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>oleksiy</td>\n",
" <td>goryayinov</td>\n",
" <td>NaN</td>\n",
" <td>[алексей николаевич горяинов, о.м.горяїнов, а....</td>\n",
" <td>NaN</td>\n",
" <td>[diagnostics, transport, logistics]</td>\n",
" <td>[[researcherid, i-7977-2016]]</td>\n",
" <td>[[, дистанционный курс «ctl.sc2x: supply chain...</td>\n",
" <td>[[docent, kharkiv petro vasylenko national tec...</td>\n",
" <td>274</td>\n",
" <td>oleksiy goryayinov</td>\n",
" <td>2014-08-03t18:06:42.925z</td>\n",
" <td>2021-03-22t13:56:48.311z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[khntusg.com.ua, khntusg.com.ua, google.com.ua...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>14.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0002-3505-2797</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>nurul malahayati</td>\n",
" <td>2017-10-01t00:46:31.324z</td>\n",
" <td>2019-08-19t15:52:47.253z</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0003-3670-9620</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>NaN</td>\n",
" <td>[imploturbocompressor, innovation, gearturbine...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>carlos barrera</td>\n",
" <td>2016-08-29t20:32:10.362z</td>\n",
" <td>2021-02-09t04:56:35.554z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>NaN</td>\n",
" <td>24.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0002-5441-0465</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>NaN</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>NaN</td>\n",
" <td>[business management, research, human resource...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>nuria hernández-león</td>\n",
" <td>2015-11-28t07:18:58.442z</td>\n",
" <td>2021-03-05t16:37:47.403z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>19.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0002-7781-6767</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>mohd nazri</td>\n",
" <td>ismail</td>\n",
" <td>born in penang, malaysia in 1971, dr. mohd had...</td>\n",
" <td>[ndum (national defence university of malaysia)]</td>\n",
" <td>NaN</td>\n",
" <td>[sensor, iot, voice over ip, wsn, design of ne...</td>\n",
" <td>[[scopus author id, 24372977800], [researcheri...</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, universiti pertahanan nasional mal...</td>\n",
" <td>35</td>\n",
" <td>scopus - elsevier</td>\n",
" <td>2016-09-06t02:25:52.974z</td>\n",
" <td>2020-10-20t06:55:55.051z</td>\n",
" <td>24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com.my, researchgate.net, academia.edu...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>0000-0001-5087-6965</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>robert</td>\n",
" <td>ohara</td>\n",
" <td>systematics, evolutionary biology, and the his...</td>\n",
" <td>[r. ohara, r.j. ohara, robert ohara, robert...</td>\n",
" <td>NaN</td>\n",
" <td>[history and philosophy of science, ancient nu...</td>\n",
" <td>[[isni, 0000000138200102], [researcherid, b-47...</td>\n",
" <td>[[biology, ph.d., harvard university, cambridg...</td>\n",
" <td>NaN</td>\n",
" <td>45</td>\n",
" <td>robert j. ohara</td>\n",
" <td>2014-09-21t02:45:19.620z</td>\n",
" <td>2020-07-09t06:51:09.228z</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>72</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[rjohara.net, google.com, collegiateway.org, r...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>0000-0002-3318-9861</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>shagufta</td>\n",
" <td>perveen</td>\n",
" <td>prof. dr. shagufta perveen is a professor at k...</td>\n",
" <td>NaN</td>\n",
" <td>shagufta792000@yahoo.com</td>\n",
" <td>[shagufta perveen university of southampton, s...</td>\n",
" <td>NaN</td>\n",
" <td>[[hej research institute of chemistry, phd che...</td>\n",
" <td>[[professor, king saud university college of p...</td>\n",
" <td>66</td>\n",
" <td>scopus - elsevier</td>\n",
" <td>2015-12-21t10:34:06.771z</td>\n",
" <td>2021-02-22t14:58:30.893z</td>\n",
" <td>56</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>66</td>\n",
" <td>1</td>\n",
" <td>yahoo.com</td>\n",
" <td>[msu.edu, ksu.edu.sa]</td>\n",
" <td>[shaguftaperveen.com, researchgate.net, ksu.ed...</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>NaN</td>\n",
" <td>25.0</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>0000-0001-8960-9004</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>susan</td>\n",
" <td>bastani</td>\n",
" <td>NaN</td>\n",
" <td>[s. bastani, سوسن باستانی]</td>\n",
" <td>sbastani@alzahra.ac.ir</td>\n",
" <td>[online and offline communities, personal netw...</td>\n",
" <td>[[scopus author id, 16642098400]]</td>\n",
" <td>[[sociology, ph.d., university of toronto, tor...</td>\n",
" <td>[[professor, alzahra university, tehran, vanak...</td>\n",
" <td>20</td>\n",
" <td>scopus - elsevier</td>\n",
" <td>2019-07-10t06:50:46.255z</td>\n",
" <td>2020-10-07t04:08:01.961z</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>alzahra.ac.ir</td>\n",
" <td>[gmail.com, gmail.com]</td>\n",
" <td>[scopus.com, google.com, publons.com, zenodo.o...</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>0000-0002-4379-6454</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[applied economics, applied econometrics, deve...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>caroline wanjiru kariuki</td>\n",
" <td>2020-03-18t10:18:04.007z</td>\n",
" <td>2021-02-11t14:40:38.515z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>0000-0003-2311-0600</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>NaN</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>NaN</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>myo kyaw hlaing</td>\n",
" <td>2018-12-26t12:51:57.801z</td>\n",
" <td>2021-01-26t14:36:47.421z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>140 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0 0000-0002-5967-2835 1 1 \n",
"1 0000-0002-3505-2797 1 1 \n",
"2 0000-0003-3670-9620 1 1 \n",
"3 0000-0002-5441-0465 1 1 \n",
"4 0000-0002-7781-6767 1 1 \n",
".. ... ... ... \n",
"135 0000-0001-5087-6965 1 1 \n",
"136 0000-0002-3318-9861 1 1 \n",
"137 0000-0001-8960-9004 1 1 \n",
"138 0000-0002-4379-6454 1 1 \n",
"139 0000-0003-2311-0600 1 1 \n",
"\n",
" given_names family_name \\\n",
"0 oleksiy goryayinov \n",
"1 nurul malahayati \n",
"2 carlos barrera \n",
"3 nuria hernández-león \n",
"4 mohd nazri ismail \n",
".. ... ... \n",
"135 robert ohara \n",
"136 shagufta perveen \n",
"137 susan bastani \n",
"138 caroline wanjiru kariuki \n",
"139 myo kyaw hlaing \n",
"\n",
" biography \\\n",
"0 NaN \n",
"1 google scholar \n",
"2 im individual inventor, and this is my work; s... \n",
"3 NaN \n",
"4 born in penang, malaysia in 1971, dr. mohd had... \n",
".. ... \n",
"135 systematics, evolutionary biology, and the his... \n",
"136 prof. dr. shagufta perveen is a professor at k... \n",
"137 NaN \n",
"138 caroline holds a phd in economics from curtin ... \n",
"139 NaN \n",
"\n",
" other_names \\\n",
"0 [алексей николаевич горяинов, о.м.горяїнов, а.... \n",
"1 NaN \n",
"2 [retrodynamic, novelinflow] \n",
"3 [nuria h. león, nuria hernández león, hernánde... \n",
"4 [ndum (national defence university of malaysia)] \n",
".. ... \n",
"135 [r. ohara, r.j. ohara, robert ohara, robert... \n",
"136 NaN \n",
"137 [s. bastani, سوسن باستانی] \n",
"138 NaN \n",
"139 [dr myo kyaw hlaing] \n",
"\n",
" primary_email \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
".. ... \n",
"135 NaN \n",
"136 shagufta792000@yahoo.com \n",
"137 sbastani@alzahra.ac.ir \n",
"138 NaN \n",
"139 NaN \n",
"\n",
" keywords \\\n",
"0 [diagnostics, transport, logistics] \n",
"1 NaN \n",
"2 [imploturbocompressor, innovation, gearturbine... \n",
"3 [business management, research, human resource... \n",
"4 [sensor, iot, voice over ip, wsn, design of ne... \n",
".. ... \n",
"135 [history and philosophy of science, ancient nu... \n",
"136 [shagufta perveen university of southampton, s... \n",
"137 [online and offline communities, personal netw... \n",
"138 [applied economics, applied econometrics, deve... \n",
"139 [economic geology] \n",
"\n",
" external_ids \\\n",
"0 [[researcherid, i-7977-2016]] \n",
"1 [[researcherid, q-3861-2017]] \n",
"2 [[loop profile, 394457]] \n",
"3 NaN \n",
"4 [[scopus author id, 24372977800], [researcheri... \n",
".. ... \n",
"135 [[isni, 0000000138200102], [researcherid, b-47... \n",
"136 NaN \n",
"137 [[scopus author id, 16642098400]] \n",
"138 NaN \n",
"139 NaN \n",
"\n",
" education \\\n",
"0 [[, дистанционный курс «ctl.sc2x: supply chain... \n",
"1 [[civil and transportation engineering , maste... \n",
"2 NaN \n",
"3 [[, course: social skills, university of salam... \n",
"4 NaN \n",
".. ... \n",
"135 [[biology, ph.d., harvard university, cambridg... \n",
"136 [[hej research institute of chemistry, phd che... \n",
"137 [[sociology, ph.d., university of toronto, tor... \n",
"138 [[economics, doctor of philosophy , curtin uni... \n",
"139 NaN \n",
"\n",
" employment n_works \\\n",
"0 [[docent, kharkiv petro vasylenko national tec... 274 \n",
"1 [[senior lecturer, universitas syiah kuala, ba... 6 \n",
"2 NaN 1 \n",
"3 [[merchandise reception and expedition trainer... 11 \n",
"4 [[lecturer, universiti pertahanan nasional mal... 35 \n",
".. ... ... \n",
"135 NaN 45 \n",
"136 [[professor, king saud university college of p... 66 \n",
"137 [[professor, alzahra university, tehran, vanak... 20 \n",
"138 [[director, educational development, strathmor... 4 \n",
"139 [[lecturer, union of myanmar ministry of educa... 2 \n",
"\n",
" works_source activation_date \\\n",
"0 oleksiy goryayinov 2014-08-03t18:06:42.925z \n",
"1 nurul malahayati 2017-10-01t00:46:31.324z \n",
"2 carlos barrera 2016-08-29t20:32:10.362z \n",
"3 nuria hernández-león 2015-11-28t07:18:58.442z \n",
"4 scopus - elsevier 2016-09-06t02:25:52.974z \n",
".. ... ... \n",
"135 robert j. ohara 2014-09-21t02:45:19.620z \n",
"136 scopus - elsevier 2015-12-21t10:34:06.771z \n",
"137 scopus - elsevier 2019-07-10t06:50:46.255z \n",
"138 caroline wanjiru kariuki 2020-03-18t10:18:04.007z \n",
"139 myo kyaw hlaing 2018-12-26t12:51:57.801z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0 2021-03-22t13:56:48.311z 0 0 0 0 1 \n",
"1 2019-08-19t15:52:47.253z 3 0 0 3 1 \n",
"2 2021-02-09t04:56:35.554z 0 0 0 0 1 \n",
"3 2021-03-05t16:37:47.403z 1 0 0 4 1 \n",
"4 2020-10-20t06:55:55.051z 24 0 0 35 1 \n",
".. ... ... ... ... ... ... \n",
"135 2020-07-09t06:51:09.228z 23 0 0 72 1 \n",
"136 2021-02-22t14:58:30.893z 56 0 0 66 1 \n",
"137 2020-10-07t04:08:01.961z 19 0 0 33 1 \n",
"138 2021-02-11t14:40:38.515z 1 0 0 0 0 \n",
"139 2021-01-26t14:36:47.421z 1 0 0 2 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
".. ... ... \n",
"135 NaN NaN \n",
"136 yahoo.com [msu.edu, ksu.edu.sa] \n",
"137 alzahra.ac.ir [gmail.com, gmail.com] \n",
"138 NaN NaN \n",
"139 NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"0 [khntusg.com.ua, khntusg.com.ua, google.com.ua... NaN 13.0 \n",
"1 [google.com, ristekdikti.go.id, unsyiah.ac.id,... NaN 16.0 \n",
"2 [blogspot.mx, behance.net, authorstream.com, d... NaN 24.0 \n",
"3 [feriaempresamujer.com, escueladenegociosydire... NaN 16.0 \n",
"4 [google.com.my, researchgate.net, academia.edu... NaN 16.0 \n",
".. ... ... ... \n",
"135 [rjohara.net, google.com, collegiateway.org, r... NaN 12.0 \n",
"136 [shaguftaperveen.com, researchgate.net, ksu.ed... 2.0 11.0 \n",
"137 [scopus.com, google.com, publons.com, zenodo.o... 2.0 11.0 \n",
"138 [scopus.com, mendeley.com, publons.com, resear... NaN 13.0 \n",
"139 [facebook.com, linkedin.com, instagram.com, re... NaN 12.0 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"0 1.0 3.0 14.0 7.0 \n",
"1 1.0 NaN 2.0 1.0 \n",
"2 1.0 8.0 NaN NaN \n",
"3 NaN 7.0 19.0 16.0 \n",
"4 2.0 10.0 NaN 4.0 \n",
".. ... ... ... ... \n",
"135 3.0 5.0 1.0 NaN \n",
"136 NaN 25.0 3.0 7.0 \n",
"137 1.0 4.0 3.0 4.0 \n",
"138 NaN 4.0 3.0 6.0 \n",
"139 NaN 1.0 NaN 2.0 \n",
"\n",
"[140 rows x 30 columns]"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_sources = df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)].explode('works_source').reset_index(drop=True)\n",
"exploded_sources"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0002-5967-2835</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>oleksiy</td>\n",
" <td>goryayinov</td>\n",
" <td>NaN</td>\n",
" <td>[алексей николаевич горяинов, о.м.горяїнов, а....</td>\n",
" <td>NaN</td>\n",
" <td>[diagnostics, transport, logistics]</td>\n",
" <td>[[researcherid, i-7977-2016]]</td>\n",
" <td>[[, дистанционный курс «ctl.sc2x: supply chain...</td>\n",
" <td>[[docent, kharkiv petro vasylenko national tec...</td>\n",
" <td>274</td>\n",
" <td>oleksiy goryayinov</td>\n",
" <td>2014-08-03t18:06:42.925z</td>\n",
" <td>2021-03-22t13:56:48.311z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[khntusg.com.ua, khntusg.com.ua, google.com.ua...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>14.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0002-3505-2797</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>nurul malahayati</td>\n",
" <td>2017-10-01t00:46:31.324z</td>\n",
" <td>2019-08-19t15:52:47.253z</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0003-3670-9620</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>NaN</td>\n",
" <td>[imploturbocompressor, innovation, gearturbine...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>carlos barrera</td>\n",
" <td>2016-08-29t20:32:10.362z</td>\n",
" <td>2021-02-09t04:56:35.554z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>NaN</td>\n",
" <td>24.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0002-5441-0465</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>NaN</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>NaN</td>\n",
" <td>[business management, research, human resource...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>nuria hernández-león</td>\n",
" <td>2015-11-28t07:18:58.442z</td>\n",
" <td>2021-03-05t16:37:47.403z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>19.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0000-0001-7010-2908</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>clara</td>\n",
" <td>sarmento</td>\n",
" <td>clara sarmento holds an aggregation in cultura...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feminist and gender studies, tourism and busi...</td>\n",
" <td>[[ciência id, d418-d6f8-7d49]]</td>\n",
" <td>[[ao abrigo da bolsa santander ie best practic...</td>\n",
" <td>[[presidente da comissão de acreditação do nov...</td>\n",
" <td>275</td>\n",
" <td>clara sarmento</td>\n",
" <td>2013-12-12t00:33:58.190z</td>\n",
" <td>2020-10-12t14:43:00.749z</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[iscap.pt, google.pt, academia.edu, researchga...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" <td>37.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>0000-0003-1020-1351</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>sheikh saifullah</td>\n",
" <td>ahmed</td>\n",
" <td>sheikh saifullah ahmed is a full-time lecturer...</td>\n",
" <td>NaN</td>\n",
" <td>saifullahahmedku@gmail.com</td>\n",
" <td>[post-truth, critical trauma analysis, postmod...</td>\n",
" <td>NaN</td>\n",
" <td>[[english discipline , ma &amp; ba in english , kh...</td>\n",
" <td>[[lecturer , international university of busin...</td>\n",
" <td>3</td>\n",
" <td>sheikh saifullah ahmed</td>\n",
" <td>2020-04-08t21:00:11.201z</td>\n",
" <td>2021-02-12t20:45:32.247z</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>gmail.com</td>\n",
" <td>NaN</td>\n",
" <td>[academia.edu, iubat.edu, google.com, research...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>0000-0001-7228-5680</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>text</td>\n",
" <td>protocol</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[engineer, textprotocol.org, palo alto, ca, u...</td>\n",
" <td>1</td>\n",
" <td>text protocol</td>\n",
" <td>2021-03-09t10:30:32.237z</td>\n",
" <td>2021-03-21t17:17:40.500z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[about.me, figma.com, github.com, gitlab.com, ...</td>\n",
" <td>NaN</td>\n",
" <td>15.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>0000-0001-5087-6965</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>robert</td>\n",
" <td>ohara</td>\n",
" <td>systematics, evolutionary biology, and the his...</td>\n",
" <td>[r. ohara, r.j. ohara, robert ohara, robert...</td>\n",
" <td>NaN</td>\n",
" <td>[history and philosophy of science, ancient nu...</td>\n",
" <td>[[isni, 0000000138200102], [researcherid, b-47...</td>\n",
" <td>[[biology, ph.d., harvard university, cambridg...</td>\n",
" <td>NaN</td>\n",
" <td>45</td>\n",
" <td>robert j. ohara</td>\n",
" <td>2014-09-21t02:45:19.620z</td>\n",
" <td>2020-07-09t06:51:09.228z</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>72</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[rjohara.net, google.com, collegiateway.org, r...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>0000-0002-4379-6454</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[applied economics, applied econometrics, deve...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>caroline wanjiru kariuki</td>\n",
" <td>2020-03-18t10:18:04.007z</td>\n",
" <td>2021-02-11t14:40:38.515z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>0000-0003-2311-0600</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>NaN</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>NaN</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>myo kyaw hlaing</td>\n",
" <td>2018-12-26t12:51:57.801z</td>\n",
" <td>2021-01-26t14:36:47.421z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>113 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0 0000-0002-5967-2835 1 1 \n",
"1 0000-0002-3505-2797 1 1 \n",
"2 0000-0003-3670-9620 1 1 \n",
"3 0000-0002-5441-0465 1 1 \n",
"5 0000-0001-7010-2908 1 1 \n",
".. ... ... ... \n",
"133 0000-0003-1020-1351 1 1 \n",
"134 0000-0001-7228-5680 1 1 \n",
"135 0000-0001-5087-6965 1 1 \n",
"138 0000-0002-4379-6454 1 1 \n",
"139 0000-0003-2311-0600 1 1 \n",
"\n",
" given_names family_name \\\n",
"0 oleksiy goryayinov \n",
"1 nurul malahayati \n",
"2 carlos barrera \n",
"3 nuria hernández-león \n",
"5 clara sarmento \n",
".. ... ... \n",
"133 sheikh saifullah ahmed \n",
"134 text protocol \n",
"135 robert ohara \n",
"138 caroline wanjiru kariuki \n",
"139 myo kyaw hlaing \n",
"\n",
" biography \\\n",
"0 NaN \n",
"1 google scholar \n",
"2 im individual inventor, and this is my work; s... \n",
"3 NaN \n",
"5 clara sarmento holds an aggregation in cultura... \n",
".. ... \n",
"133 sheikh saifullah ahmed is a full-time lecturer... \n",
"134 NaN \n",
"135 systematics, evolutionary biology, and the his... \n",
"138 caroline holds a phd in economics from curtin ... \n",
"139 NaN \n",
"\n",
" other_names \\\n",
"0 [алексей николаевич горяинов, о.м.горяїнов, а.... \n",
"1 NaN \n",
"2 [retrodynamic, novelinflow] \n",
"3 [nuria h. león, nuria hernández león, hernánde... \n",
"5 NaN \n",
".. ... \n",
"133 NaN \n",
"134 NaN \n",
"135 [r. ohara, r.j. ohara, robert ohara, robert... \n",
"138 NaN \n",
"139 [dr myo kyaw hlaing] \n",
"\n",
" primary_email \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"5 NaN \n",
".. ... \n",
"133 saifullahahmedku@gmail.com \n",
"134 NaN \n",
"135 NaN \n",
"138 NaN \n",
"139 NaN \n",
"\n",
" keywords \\\n",
"0 [diagnostics, transport, logistics] \n",
"1 NaN \n",
"2 [imploturbocompressor, innovation, gearturbine... \n",
"3 [business management, research, human resource... \n",
"5 [feminist and gender studies, tourism and busi... \n",
".. ... \n",
"133 [post-truth, critical trauma analysis, postmod... \n",
"134 NaN \n",
"135 [history and philosophy of science, ancient nu... \n",
"138 [applied economics, applied econometrics, deve... \n",
"139 [economic geology] \n",
"\n",
" external_ids \\\n",
"0 [[researcherid, i-7977-2016]] \n",
"1 [[researcherid, q-3861-2017]] \n",
"2 [[loop profile, 394457]] \n",
"3 NaN \n",
"5 [[ciência id, d418-d6f8-7d49]] \n",
".. ... \n",
"133 NaN \n",
"134 NaN \n",
"135 [[isni, 0000000138200102], [researcherid, b-47... \n",
"138 NaN \n",
"139 NaN \n",
"\n",
" education \\\n",
"0 [[, дистанционный курс «ctl.sc2x: supply chain... \n",
"1 [[civil and transportation engineering , maste... \n",
"2 NaN \n",
"3 [[, course: social skills, university of salam... \n",
"5 [[ao abrigo da bolsa santander ie best practic... \n",
".. ... \n",
"133 [[english discipline , ma & ba in english , kh... \n",
"134 NaN \n",
"135 [[biology, ph.d., harvard university, cambridg... \n",
"138 [[economics, doctor of philosophy , curtin uni... \n",
"139 NaN \n",
"\n",
" employment n_works \\\n",
"0 [[docent, kharkiv petro vasylenko national tec... 274 \n",
"1 [[senior lecturer, universitas syiah kuala, ba... 6 \n",
"2 NaN 1 \n",
"3 [[merchandise reception and expedition trainer... 11 \n",
"5 [[presidente da comissão de acreditação do nov... 275 \n",
".. ... ... \n",
"133 [[lecturer , international university of busin... 3 \n",
"134 [[engineer, textprotocol.org, palo alto, ca, u... 1 \n",
"135 NaN 45 \n",
"138 [[director, educational development, strathmor... 4 \n",
"139 [[lecturer, union of myanmar ministry of educa... 2 \n",
"\n",
" works_source activation_date \\\n",
"0 oleksiy goryayinov 2014-08-03t18:06:42.925z \n",
"1 nurul malahayati 2017-10-01t00:46:31.324z \n",
"2 carlos barrera 2016-08-29t20:32:10.362z \n",
"3 nuria hernández-león 2015-11-28t07:18:58.442z \n",
"5 clara sarmento 2013-12-12t00:33:58.190z \n",
".. ... ... \n",
"133 sheikh saifullah ahmed 2020-04-08t21:00:11.201z \n",
"134 text protocol 2021-03-09t10:30:32.237z \n",
"135 robert j. ohara 2014-09-21t02:45:19.620z \n",
"138 caroline wanjiru kariuki 2020-03-18t10:18:04.007z \n",
"139 myo kyaw hlaing 2018-12-26t12:51:57.801z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0 2021-03-22t13:56:48.311z 0 0 0 0 1 \n",
"1 2019-08-19t15:52:47.253z 3 0 0 3 1 \n",
"2 2021-02-09t04:56:35.554z 0 0 0 0 1 \n",
"3 2021-03-05t16:37:47.403z 1 0 0 4 1 \n",
"5 2020-10-12t14:43:00.749z 17 0 0 60 1 \n",
".. ... ... ... ... ... ... \n",
"133 2021-02-12t20:45:32.247z 2 0 0 3 0 \n",
"134 2021-03-21t17:17:40.500z 0 0 0 0 0 \n",
"135 2020-07-09t06:51:09.228z 23 0 0 72 1 \n",
"138 2021-02-11t14:40:38.515z 1 0 0 0 0 \n",
"139 2021-01-26t14:36:47.421z 1 0 0 2 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"5 NaN NaN \n",
".. ... ... \n",
"133 gmail.com NaN \n",
"134 NaN NaN \n",
"135 NaN NaN \n",
"138 NaN NaN \n",
"139 NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"0 [khntusg.com.ua, khntusg.com.ua, google.com.ua... NaN 13.0 \n",
"1 [google.com, ristekdikti.go.id, unsyiah.ac.id,... NaN 16.0 \n",
"2 [blogspot.mx, behance.net, authorstream.com, d... NaN 24.0 \n",
"3 [feriaempresamujer.com, escueladenegociosydire... NaN 16.0 \n",
"5 [iscap.pt, google.pt, academia.edu, researchga... NaN 13.0 \n",
".. ... ... ... \n",
"133 [academia.edu, iubat.edu, google.com, research... NaN 12.0 \n",
"134 [about.me, figma.com, github.com, gitlab.com, ... NaN 15.0 \n",
"135 [rjohara.net, google.com, collegiateway.org, r... NaN 12.0 \n",
"138 [scopus.com, mendeley.com, publons.com, resear... NaN 13.0 \n",
"139 [facebook.com, linkedin.com, instagram.com, re... NaN 12.0 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"0 1.0 3.0 14.0 7.0 \n",
"1 1.0 NaN 2.0 1.0 \n",
"2 1.0 8.0 NaN NaN \n",
"3 NaN 7.0 19.0 16.0 \n",
"5 1.0 6.0 8.0 37.0 \n",
".. ... ... ... ... \n",
"133 NaN 5.0 1.0 1.0 \n",
"134 NaN NaN NaN 1.0 \n",
"135 3.0 5.0 1.0 NaN \n",
"138 NaN 4.0 3.0 6.0 \n",
"139 NaN 1.0 NaN 2.0 \n",
"\n",
"[113 rows x 30 columns]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_sources[exploded_sources.apply(lambda x: x['works_source'].find(x['given_names']) >= 0, axis=1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Works source"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"def remove_own_source(lst, given, family):\n",
" res = []\n",
" for ws in lst:\n",
" if ws.lower().find(given.lower()) == -1:\n",
" if pd.notna(family):\n",
" if ws.lower().find(family.lower()) == -1:\n",
" res.append(ws)\n",
" else:\n",
" res.append(ws)\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"df['ext_works_source'] = df[(df.works_source.notna()) & (df.given_names.notna())]\\\n",
" .apply(lambda x: remove_own_source(x['works_source'], x['given_names'], x['family_name']), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"df['n_ext_work_source'] = df.ext_works_source.str.len()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"exploded_external_sources = df[df['ext_works_source'].str.len() > 0][['orcid','ext_works_source']]\\\n",
" .explode('ext_works_source').reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"grouped_ext_sources = exploded_external_sources.groupby('ext_works_source')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)\\\n",
" .reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"crossref",
"scopus - elsevier",
"crossref metadata search",
"multidisciplinary digital publishing institute",
"europe pubmed central",
"researcherid",
"publons",
"ciênciavitae",
"base - bielefeld academic search engine",
"datacite",
"redalyc",
"mla international bibliography",
"deutsche nationalbibliothek (dnb)",
"nasa astrophysics data system",
"national information processing institute ",
"f1000",
"inspire-hep",
"university of helsinki",
"hal",
"igi global",
"airiti",
"university of copenhagen",
"universidade federal de uberlândia",
"aarhus university",
"universidad del país vasco",
"university of manchester - pure",
"kings college london",
"university of southern denmark",
"wellcome open research",
"macquarie university"
],
"y": [
1460841,
902231,
297684,
281664,
181605,
158148,
39786,
32315,
20699,
16107,
9640,
8059,
7855,
7403,
6509,
5221,
4872,
4152,
4136,
3833,
3725,
3127,
2718,
2311,
2271,
2227,
2199,
2185,
2113,
2053
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 works_source"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"1973265c-1a28-4ddb-a35d-d6e365f24978\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"1973265c-1a28-4ddb-a35d-d6e365f24978\")) { Plotly.newPlot( \"1973265c-1a28-4ddb-a35d-d6e365f24978\", [{\"type\": \"bar\", \"x\": [\"crossref\", \"scopus - elsevier\", \"crossref metadata search\", \"multidisciplinary digital publishing institute\", \"europe pubmed central\", \"researcherid\", \"publons\", \"ci\\u00eanciavitae\", \"base - bielefeld academic search engine\", \"datacite\", \"redalyc\", \"mla international bibliography\", \"deutsche nationalbibliothek (dnb)\", \"nasa astrophysics data system\", \"national information processing institute \", \"f1000\", \"inspire-hep\", \"university of helsinki\", \"hal\", \"igi global\", \"airiti\", \"university of copenhagen\", \"universidade federal de uberl\\u00e2ndia\", \"aarhus university\", \"universidad del pa\\u00eds vasco\", \"university of manchester - pure\", \"kings college london\", \"university of southern denmark\", \"wellcome open research\", \"macquarie university\"], \"y\": [1460841, 902231, 297684, 281664, 181605, 158148, 39786, 32315, 20699, 16107, 9640, 8059, 7855, 7403, 6509, 5221, 4872, 4152, 4136, 3833, 3725, 3127, 2718, 2311, 2271, 2227, 2199, 2185, 2113, 2053]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 works_source\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('1973265c-1a28-4ddb-a35d-d6e365f24978');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=grouped_ext_sources[:30].ext_works_source,\n",
" y=grouped_ext_sources[:30].orcid\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top 30 works_source',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ext_works_source</th>\n",
" <th>orcid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>crossref</td>\n",
" <td>1460841</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>scopus - elsevier</td>\n",
" <td>902231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>crossref metadata search</td>\n",
" <td>297684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>multidisciplinary digital publishing institute</td>\n",
" <td>281664</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>europe pubmed central</td>\n",
" <td>181605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>337</th>\n",
" <td>uta - oa journal global insight</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338</th>\n",
" <td>francis crick institute</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339</th>\n",
" <td>anna</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>340</th>\n",
" <td>santos</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>341</th>\n",
" <td>universitäts- und stadtbibliothek köln</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>342 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" ext_works_source orcid\n",
"0 crossref 1460841\n",
"1 scopus - elsevier 902231\n",
"2 crossref metadata search 297684\n",
"3 multidisciplinary digital publishing institute 281664\n",
"4 europe pubmed central 181605\n",
".. ... ...\n",
"337 uta - oa journal global insight 3\n",
"338 francis crick institute 3\n",
"339 anna 3\n",
"340 santos 3\n",
"341 universitäts- und stadtbibliothek köln 3\n",
"\n",
"[342 rows x 2 columns]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"authoritative_sources = grouped_ext_sources[grouped_ext_sources['orcid'] > 2]\n",
"authoritative_sources"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"exploded_external_sources['authoritative'] = exploded_external_sources.ext_works_source\\\n",
" .isin(authoritative_sources['ext_works_source'])"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"orcid_authoritative_source = exploded_external_sources\\\n",
" .groupby('orcid')['authoritative']\\\n",
" .any()\\\n",
" .reset_index()[['orcid', 'authoritative']]"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"df = df.set_index('orcid').join(orcid_authoritative_source.set_index('orcid')).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"df.loc[df.authoritative.isna(), 'authoritative'] = False"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0001-6097-3953</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-03-02t09:29:16.528z</td>\n",
" <td>2018-03-02t09:43:07.551z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0001-6112-5550</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[v.i. yurtaev; v. yurtaev]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[professor, peoples friendship university of ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-04-03t07:50:23.358z</td>\n",
" <td>2020-03-18t09:42:44.753z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-6152-2695</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2019-12-11t15:31:56.388z</td>\n",
" <td>2020-01-28t15:34:17.309z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-6220-5683</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, new york university abu ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-08-18t12:36:45.307z</td>\n",
" <td>2020-09-23t13:37:54.180z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-7071-8294</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcher (academic), universidad de zarago...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2014-03-10t13:22:01.966z</td>\n",
" <td>2016-06-14t22:17:54.470z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email given_names \\\n",
"0 0000-0001-6097-3953 0 0 NaN \n",
"1 0000-0001-6112-5550 1 1 NaN \n",
"2 0000-0001-6152-2695 1 1 NaN \n",
"3 0000-0001-6220-5683 1 1 NaN \n",
"4 0000-0001-7071-8294 1 1 NaN \n",
"\n",
" family_name biography other_names primary_email keywords \\\n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN [v.i. yurtaev; v. yurtaev] NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" external_ids education employment \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN [[professor, peoples friendship university of ... \n",
"2 NaN NaN NaN \n",
"3 NaN NaN [[research scientist, new york university abu ... \n",
"4 NaN NaN [[researcher (academic), universidad de zarago... \n",
"\n",
" n_works works_source activation_date last_update_date \\\n",
"0 0 NaN 2018-03-02t09:29:16.528z 2018-03-02t09:43:07.551z \n",
"1 0 NaN 2018-04-03t07:50:23.358z 2020-03-18t09:42:44.753z \n",
"2 0 NaN 2019-12-11t15:31:56.388z 2020-01-28t15:34:17.309z \n",
"3 0 NaN 2015-08-18t12:36:45.307z 2020-09-23t13:37:54.180z \n",
"4 0 NaN 2014-03-10t13:22:01.966z 2016-06-14t22:17:54.470z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"0 0 0 0 0 0 NaN \n",
"1 0 0 0 0 0 NaN \n",
"2 0 0 0 0 0 NaN \n",
"3 0 0 0 0 0 NaN \n",
"4 0 0 0 0 0 NaN \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids n_keywords \\\n",
"0 NaN NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" n_education n_employment ext_works_source n_ext_work_source authoritative \n",
"0 NaN NaN NaN NaN False \n",
"1 NaN 1.0 NaN NaN False \n",
"2 NaN NaN NaN NaN False \n",
"3 NaN 1.0 NaN NaN False \n",
"4 NaN 2.0 NaN NaN False "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## External IDs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"External IDs should come from reliable sources. ORCiD registrants cannot add them freely."
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 1.308598e+06\n",
"mean 1.359082e+00\n",
"std 6.643235e-01\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 8.000000e+01\n",
"Name: n_ids, dtype: float64"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.n_ids.describe()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3896226</th>\n",
" <td>0000-0002-9554-6633</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>john a</td>\n",
" <td>williams</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[scopus author id,  55553733518], [scopus aut...</td>\n",
" <td>NaN</td>\n",
" <td>[[, aston university, birmingham, , gb, 1722, ...</td>\n",
" <td>92</td>\n",
" <td>[aston research explorer]</td>\n",
" <td>2014-11-20t09:42:10.690z</td>\n",
" <td>2021-03-17t01:00:51.203z</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>208</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[aston.ac.uk]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>80.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>[aston research explorer]</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"3896226 0000-0002-9554-6633 1 1 \n",
"\n",
" given_names family_name biography other_names primary_email keywords \\\n",
"3896226 john a williams NaN NaN NaN NaN \n",
"\n",
" external_ids education \\\n",
"3896226 [[scopus author id,  55553733518], [scopus aut... NaN \n",
"\n",
" employment n_works \\\n",
"3896226 [[, aston university, birmingham, , gb, 1722, ... 92 \n",
"\n",
" works_source activation_date \\\n",
"3896226 [aston research explorer] 2014-11-20t09:42:10.690z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n",
"3896226 2021-03-17t01:00:51.203z 80 0 0 208 1 \n",
"\n",
" primary_email_domain other_email_domains url_domains n_emails \\\n",
"3896226 NaN NaN [aston.ac.uk] NaN \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \\\n",
"3896226 1.0 80.0 NaN NaN 1.0 \n",
"\n",
" ext_works_source n_ext_work_source authoritative \n",
"3896226 [aston research explorer] 1.0 True "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_ids == df.n_ids.max()]"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"ids = df[['orcid', 'external_ids']].explode('external_ids').reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"ids['provider'] = ids[ids.external_ids.notna()]['external_ids'].apply(lambda x: x[0])"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>external_ids</th>\n",
" <th>provider</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0000-0001-8315-2066</td>\n",
" <td>[researcherid, k-4630-2014]</td>\n",
" <td>researcherid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0000-0002-2638-4108</td>\n",
" <td>[scopus author id, 54394231000]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>0000-0003-1435-6545</td>\n",
" <td>[researcherid, p-2223-2018]</td>\n",
" <td>researcherid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>0000-0003-2259-7023</td>\n",
" <td>[scopus author id, 57189297461]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>0000-0002-7397-5824</td>\n",
" <td>[scopus author id, 8399842800]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid external_ids provider\n",
"9 0000-0001-8315-2066 [researcherid, k-4630-2014] researcherid\n",
"29 0000-0002-2638-4108 [scopus author id, 54394231000] scopus author id\n",
"46 0000-0003-1435-6545 [researcherid, p-2223-2018] researcherid\n",
"50 0000-0003-2259-7023 [scopus author id, 57189297461] scopus author id\n",
"64 0000-0002-7397-5824 [scopus author id, 8399842800] scopus author id"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ids[ids.provider.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"top_ids_providers = ids.groupby('provider').count().sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"scopus author id",
"researcherid",
"loop profile",
"ciência id",
"researcher name resolver id",
"sciprofile",
"中国科学家在线",
"isni",
"gnd",
"pitt id",
"technical university of denmark cwis",
"researcher id",
"id dialnet",
"digital author id",
"scopus author id: ",
"authenticusid",
"hku researcherpage",
"uow scholars",
"cti vitae",
"scopus author id:",
"hkust profile",
"chalmers id",
"scopus id",
"iauthor",
"google scholar",
"digital author id (dai)",
"authid",
"dai",
"us epa vivo",
"scopus id",
"authenticus",
"smithsonian profiles",
"github",
"escientist",
"vivo cornell",
"researcherid:",
"id dialnet:",
"dialnet id",
"sciprofiles",
"kaken",
"une researcher id",
"researcherid: ",
"orcid",
"scienceopen",
"profile system identifier",
"orcid id",
"custom"
],
"y": [
1037239,
545399,
118645,
37042,
7954,
5164,
4811,
3089,
2999,
2679,
2483,
1452,
1169,
1126,
1077,
878,
741,
646,
582,
547,
523,
430,
256,
212,
201,
180,
175,
155,
146,
127,
83,
61,
51,
49,
46,
39,
7,
6,
5,
5,
4,
3,
2,
1,
1,
1,
1
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "IDs provided by providers"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"a52a99be-a67a-42e0-b36b-ae05461a1f5f\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"a52a99be-a67a-42e0-b36b-ae05461a1f5f\")) { Plotly.newPlot( \"a52a99be-a67a-42e0-b36b-ae05461a1f5f\", [{\"type\": \"bar\", \"x\": [\"scopus author id\", \"researcherid\", \"loop profile\", \"ci\\u00eancia id\", \"researcher name resolver id\", \"sciprofile\", \"\\u4e2d\\u56fd\\u79d1\\u5b66\\u5bb6\\u5728\\u7ebf\", \"isni\", \"gnd\", \"pitt id\", \"technical university of denmark cwis\", \"researcher id\", \"id dialnet\", \"digital author id\", \"scopus author id: \", \"authenticusid\", \"hku researcherpage\", \"uow scholars\", \"cti vitae\", \"scopus author id:\", \"hkust profile\", \"chalmers id\", \"scopus id\", \"iauthor\", \"google scholar\", \"digital author id (dai)\", \"authid\", \"dai\", \"us epa vivo\", \"scopus id\", \"authenticus\", \"smithsonian profiles\", \"github\", \"escientist\", \"vivo cornell\", \"researcherid:\", \"id dialnet:\", \"dialnet id\", \"sciprofiles\", \"kaken\", \"une researcher id\", \"researcherid: \", \"orcid\", \"scienceopen\", \"profile system identifier\", \"orcid id\", \"custom\"], \"y\": [1037239, 545399, 118645, 37042, 7954, 5164, 4811, 3089, 2999, 2679, 2483, 1452, 1169, 1126, 1077, 878, 741, 646, 582, 547, 523, 430, 256, 212, 201, 180, 175, 155, 146, 127, 83, 61, 51, 49, 46, 39, 7, 6, 5, 5, 4, 3, 2, 1, 1, 1, 1]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"IDs provided by providers\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('a52a99be-a67a-42e0-b36b-ae05461a1f5f');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=top_ids_providers.index,\n",
" y=top_ids_providers['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='IDs provided by providers',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'researcherid', 'scopus author id', 'loop profile', 'gnd',\n",
" 'ciência id', 'researcher name resolver id', 'pitt id',\n",
" 'id dialnet', 'isni', 'technical university of denmark cwis',\n",
" 'chalmers id', 'scopus author id: ', 'scopus author id:',\n",
" 'hkust profile', 'hku researcherpage', '中国科学家在线', 'uow scholars',\n",
" 'sciprofile', 'cti vitae', 'digital author id', 'researcher id',\n",
" 'authenticusid', 'authid', 'authenticus', 'scopus id',\n",
" 'digital author id (dai)', 'researcherid:', 'vivo cornell',\n",
" 'us epa vivo', 'escientist', 'github', 'iauthor', 'orcid id',\n",
" 'dai', 'scopus id', 'smithsonian profiles', 'google scholar',\n",
" 'kaken', 'dialnet id', 'researcherid: ', 'une researcher id',\n",
" 'sciprofiles', 'id dialnet:', 'scienceopen', 'orcid',\n",
" 'profile system identifier', 'custom'], dtype=object)"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.unique(ids['provider'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Keywords"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This field is problematic as users can be nasty and put multiple keywords in one as opposed of having different keywords. Look this"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_keywords</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3751714</th>\n",
" <td>0000-0002-0673-0341</td>\n",
" <td>154.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8697926</th>\n",
" <td>0000-0003-3343-5660</td>\n",
" <td>148.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1154523</th>\n",
" <td>0000-0002-6075-3501</td>\n",
" <td>140.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6512971</th>\n",
" <td>0000-0002-7060-4112</td>\n",
" <td>140.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1515197</th>\n",
" <td>0000-0001-5287-1949</td>\n",
" <td>132.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989644</th>\n",
" <td>0000-0002-1686-1935</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989646</th>\n",
" <td>0000-0002-8783-5814</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989647</th>\n",
" <td>0000-0002-7584-2283</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989648</th>\n",
" <td>0000-0003-0529-3538</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10989649 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_keywords\n",
"3751714 0000-0002-0673-0341 154.0\n",
"8697926 0000-0003-3343-5660 148.0\n",
"1154523 0000-0002-6075-3501 140.0\n",
"6512971 0000-0002-7060-4112 140.0\n",
"1515197 0000-0001-5287-1949 132.0\n",
"... ... ...\n",
"10989644 0000-0002-1686-1935 NaN\n",
"10989645 0000-0002-3800-6331 NaN\n",
"10989646 0000-0002-8783-5814 NaN\n",
"10989647 0000-0002-7584-2283 NaN\n",
"10989648 0000-0003-0529-3538 NaN\n",
"\n",
"[10989649 rows x 2 columns]"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"keywords_by_orcid = df[['orcid', 'n_keywords']].sort_values('n_keywords', ascending=False)\n",
"keywords_by_orcid"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0002-0673-0341",
"0000-0003-3343-5660",
"0000-0002-6075-3501",
"0000-0002-7060-4112",
"0000-0001-5287-1949",
"0000-0002-9638-8091",
"0000-0002-4071-0301",
"0000-0001-9462-5666",
"0000-0002-0929-2412",
"0000-0002-0115-7195",
"0000-0002-4235-4259",
"0000-0003-0076-6287",
"0000-0001-9715-9357",
"0000-0002-1878-9762",
"0000-0001-6307-6027",
"0000-0003-2273-9888",
"0000-0002-0937-7061",
"0000-0002-1770-9660",
"0000-0001-5696-1052",
"0000-0003-2998-5520",
"0000-0003-1799-0971",
"0000-0002-0156-3580",
"0000-0002-9625-6742",
"0000-0003-1399-7156",
"0000-0001-9985-1697",
"0000-0001-6537-7683",
"0000-0002-8401-8018",
"0000-0003-4246-8579",
"0000-0001-7857-4133",
"0000-0002-7710-0355",
"0000-0001-5869-2204",
"0000-0002-8083-7382",
"0000-0001-8670-4372",
"0000-0001-7654-5013",
"0000-0002-4488-2880",
"0000-0003-4374-6374",
"0000-0001-6939-3859",
"0000-0003-2509-2549",
"0000-0002-3186-8860",
"0000-0002-0441-1507",
"0000-0001-5230-715X",
"0000-0003-0209-180X",
"0000-0001-9336-6850",
"0000-0002-0463-0048",
"0000-0001-5458-7167",
"0000-0002-9381-2264",
"0000-0002-8227-5387",
"0000-0002-3061-3364",
"0000-0002-9293-0189",
"0000-0002-3123-3021",
"0000-0003-1071-4296",
"0000-0003-3340-6413",
"0000-0003-3584-6834",
"0000-0002-8644-8396",
"0000-0002-2935-1934",
"0000-0002-1718-1632",
"0000-0002-8659-6321",
"0000-0002-8449-2211",
"0000-0003-1693-3190",
"0000-0001-5637-1124",
"0000-0001-5167-7466",
"0000-0002-3532-043X",
"0000-0001-6861-9561",
"0000-0003-4608-3844",
"0000-0003-4505-3678",
"0000-0003-4673-1063",
"0000-0001-8174-8835",
"0000-0002-6347-9464",
"0000-0002-8918-2781",
"0000-0003-4511-7942",
"0000-0003-2532-2906",
"0000-0001-9280-6017",
"0000-0002-5274-7742",
"0000-0001-9586-0780",
"0000-0003-3720-1183",
"0000-0001-5819-4555",
"0000-0002-1103-9651",
"0000-0001-8135-2304",
"0000-0002-8499-1045",
"0000-0003-2550-1859",
"0000-0002-8665-9281",
"0000-0001-7818-3212",
"0000-0003-1863-0265",
"0000-0001-8733-5230",
"0000-0003-2218-1343",
"0000-0002-5306-7781",
"0000-0001-7728-4046",
"0000-0003-4486-2684",
"0000-0002-4982-5236",
"0000-0001-5300-3932",
"0000-0003-3342-6123",
"0000-0002-8072-1152",
"0000-0002-3494-2624",
"0000-0002-0715-0461",
"0000-0002-3907-3552",
"0000-0001-5556-8275",
"0000-0002-3597-3350",
"0000-0002-2252-672X",
"0000-0001-7392-9361",
"0000-0001-8689-185X"
],
"y": [
154,
148,
140,
140,
132,
124,
115,
106,
105,
102,
100,
94,
92,
92,
88,
86,
78,
77,
75,
75,
72,
71,
70,
68,
68,
68,
67,
66,
64,
64,
63,
62,
61,
61,
61,
60,
60,
56,
55,
54,
54,
53,
53,
53,
53,
53,
52,
52,
52,
51,
51,
51,
50,
50,
50,
50,
50,
49,
49,
49,
49,
48,
48,
48,
48,
48,
47,
47,
47,
47,
46,
46,
46,
45,
45,
45,
45,
44,
44,
44,
44,
44,
44,
44,
44,
44,
43,
43,
43,
43,
43,
43,
43,
43,
42,
42,
42,
42,
42,
42
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Keywords provided by ORCiD"
},
"xaxis": {
"range": [
-0.5,
99.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"286d891e-2830-4ae3-92f6-d8c54ba98c44\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"286d891e-2830-4ae3-92f6-d8c54ba98c44\")) { Plotly.newPlot( \"286d891e-2830-4ae3-92f6-d8c54ba98c44\", [{\"type\": \"bar\", \"x\": [\"0000-0002-0673-0341\", \"0000-0003-3343-5660\", \"0000-0002-6075-3501\", \"0000-0002-7060-4112\", \"0000-0001-5287-1949\", \"0000-0002-9638-8091\", \"0000-0002-4071-0301\", \"0000-0001-9462-5666\", \"0000-0002-0929-2412\", \"0000-0002-0115-7195\", \"0000-0002-4235-4259\", \"0000-0003-0076-6287\", \"0000-0001-9715-9357\", \"0000-0002-1878-9762\", \"0000-0001-6307-6027\", \"0000-0003-2273-9888\", \"0000-0002-0937-7061\", \"0000-0002-1770-9660\", \"0000-0001-5696-1052\", \"0000-0003-2998-5520\", \"0000-0003-1799-0971\", \"0000-0002-0156-3580\", \"0000-0002-9625-6742\", \"0000-0003-1399-7156\", \"0000-0001-9985-1697\", \"0000-0001-6537-7683\", \"0000-0002-8401-8018\", \"0000-0003-4246-8579\", \"0000-0001-7857-4133\", \"0000-0002-7710-0355\", \"0000-0001-5869-2204\", \"0000-0002-8083-7382\", \"0000-0001-8670-4372\", \"0000-0001-7654-5013\", \"0000-0002-4488-2880\", \"0000-0003-4374-6374\", \"0000-0001-6939-3859\", \"0000-0003-2509-2549\", \"0000-0002-3186-8860\", \"0000-0002-0441-1507\", \"0000-0001-5230-715X\", \"0000-0003-0209-180X\", \"0000-0001-9336-6850\", \"0000-0002-0463-0048\", \"0000-0001-5458-7167\", \"0000-0002-9381-2264\", \"0000-0002-8227-5387\", \"0000-0002-3061-3364\", \"0000-0002-9293-0189\", \"0000-0002-3123-3021\", \"0000-0003-1071-4296\", \"0000-0003-3340-6413\", \"0000-0003-3584-6834\", \"0000-0002-8644-8396\", \"0000-0002-2935-1934\", \"0000-0002-1718-1632\", \"0000-0002-8659-6321\", \"0000-0002-8449-2211\", \"0000-0003-1693-3190\", \"0000-0001-5637-1124\", \"0000-0001-5167-7466\", \"0000-0002-3532-043X\", \"0000-0001-6861-9561\", \"0000-0003-4608-3844\", \"0000-0003-4505-3678\", \"0000-0003-4673-1063\", \"0000-0001-8174-8835\", \"0000-0002-6347-9464\", \"0000-0002-8918-2781\", \"0000-0003-4511-7942\", \"0000-0003-2532-2906\", \"0000-0001-9280-6017\", \"0000-0002-5274-7742\", \"0000-0001-9586-0780\", \"0000-0003-3720-1183\", \"0000-0001-5819-4555\", \"0000-0002-1103-9651\", \"0000-0001-8135-2304\", \"0000-0002-8499-1045\", \"0000-0003-2550-1859\", \"0000-0002-8665-9281\", \"0000-0001-7818-3212\", \"0000-0003-1863-0265\", \"0000-0001-8733-5230\", \"0000-0003-2218-1343\", \"0000-0002-5306-7781\", \"0000-0001-7728-4046\", \"0000-0003-4486-2684\", \"0000-0002-4982-5236\", \"0000-0001-5300-3932\", \"0000-0003-3342-6123\", \"0000-0002-8072-1152\", \"0000-0002-3494-2624\", \"0000-0002-0715-0461\", \"0000-0002-3907-3552\", \"0000-0001-5556-8275\", \"0000-0002-3597-3350\", \"0000-0002-2252-672X\", \"0000-0001-7392-9361\", \"0000-0001-8689-185X\"], \"y\": [154.0, 148.0, 140.0, 140.0, 132.0, 124.0, 115.0, 106.0, 105.0, 102.0, 100.0, 94.0, 92.0, 92.0, 88.0, 86.0, 78.0, 77.0, 75.0, 75.0, 72.0, 71.0, 70.0, 68.0, 68.0, 68.0, 67.0, 66.0, 64.0, 64.0, 63.0, 62.0, 61.0, 61.0, 61.0, 60.0, 60.0, 56.0, 55.0, 54.0, 54.0, 53.0, 53.0, 53.0, 53.0, 53.0, 52.0, 52.0, 52.0, 51.0, 51.0, 51.0, 50.0, 50.0, 50.0, 50.0, 50.0, 49.0, 49.0, 49.0, 49.0, 48.0, 48.0, 48.0, 48.0, 48.0, 47.0, 47.0, 47.0, 47.0, 46.0, 46.0, 46.0, 45.0, 45.0, 45.0, 45.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 43.0, 43.0, 43.0, 43.0, 43.0, 43.0, 43.0, 43.0, 42.0, 42.0, 42.0, 42.0, 42.0, 42.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Keywords provided by ORCiD\"}, \"xaxis\": {\"range\": [-0.5, 99.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('286d891e-2830-4ae3-92f6-d8c54ba98c44');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(100)\n",
"data = [\n",
" go.Bar(\n",
" x=keywords_by_orcid[:TOP_N]['orcid'],\n",
" y=keywords_by_orcid[:TOP_N]['n_keywords']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Keywords provided by ORCiD',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"top_keywords = df[['orcid', 'keywords']]\\\n",
" .explode('keywords')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('keywords')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"machine learning",
"bioinformatics",
"education",
"molecular biology",
"cancer",
"ecology",
"artificial intelligence",
"epidemiology",
"public health",
"microbiology",
"neuroscience",
"immunology",
"genetics",
"climate change",
"remote sensing",
"biochemistry",
"genomics",
"biotechnology",
"nanotechnology",
"sustainability",
"educación",
"gis",
"deep learning",
"psychology",
"computer vision",
"marketing",
"nutrition",
"innovation",
"data science",
"statistics",
"data mining",
"nanomaterials",
"image processing",
"robotics",
"management",
"optimization",
"renewable energy",
"chemistry",
"biomaterials",
"diabetes",
"gender",
"educação",
"architecture",
"catalysis",
"history",
"electrochemistry",
"evolution",
"research",
"energy",
"biodiversity"
],
"y": [
8574,
5424,
5191,
4557,
4163,
3923,
3839,
3789,
3676,
3550,
3495,
3468,
3343,
3337,
3279,
3003,
2794,
2681,
2674,
2654,
2526,
2511,
2466,
2381,
2309,
2213,
2199,
2154,
2153,
2144,
2108,
2100,
2099,
2086,
2081,
2071,
2009,
2005,
2002,
1998,
1997,
1873,
1835,
1813,
1813,
1800,
1797,
1789,
1770,
1717
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-50 keywords occurrence"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"3695d308-d429-45eb-8e10-2ba6f3e90178\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"3695d308-d429-45eb-8e10-2ba6f3e90178\")) { Plotly.newPlot( \"3695d308-d429-45eb-8e10-2ba6f3e90178\", [{\"type\": \"bar\", \"x\": [\"machine learning\", \"bioinformatics\", \"education\", \"molecular biology\", \"cancer\", \"ecology\", \"artificial intelligence\", \"epidemiology\", \"public health\", \"microbiology\", \"neuroscience\", \"immunology\", \"genetics\", \"climate change\", \"remote sensing\", \"biochemistry\", \"genomics\", \"biotechnology\", \"nanotechnology\", \"sustainability\", \"educaci\\u00f3n\", \"gis\", \"deep learning\", \"psychology\", \"computer vision\", \"marketing\", \"nutrition\", \"innovation\", \"data science\", \"statistics\", \"data mining\", \"nanomaterials\", \"image processing\", \"robotics\", \"management\", \"optimization\", \"renewable energy\", \"chemistry\", \"biomaterials\", \"diabetes\", \"gender\", \"educa\\u00e7\\u00e3o\", \"architecture\", \"catalysis\", \"history\", \"electrochemistry\", \"evolution\", \"research\", \"energy\", \"biodiversity\"], \"y\": [8574, 5424, 5191, 4557, 4163, 3923, 3839, 3789, 3676, 3550, 3495, 3468, 3343, 3337, 3279, 3003, 2794, 2681, 2674, 2654, 2526, 2511, 2466, 2381, 2309, 2213, 2199, 2154, 2153, 2144, 2108, 2100, 2099, 2086, 2081, 2071, 2009, 2005, 2002, 1998, 1997, 1873, 1835, 1813, 1813, 1800, 1797, 1789, 1770, 1717]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top-50 keywords occurrence\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('3695d308-d429-45eb-8e10-2ba6f3e90178');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(50)\n",
"data = [\n",
" go.Bar(\n",
" x=top_keywords[:TOP_N].index,\n",
" y=top_keywords[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s keywords occurrence' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Education"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"def extract_education(lst):\n",
" educations = []\n",
" for e in lst:\n",
" # e[0] degree\n",
" # e[1] role\n",
" # e[2] university\n",
" # e[..] city, region, country, id, id_scheme\n",
" educations.append(' '.join([e[0], e[1], e[2]]))\n",
" return educations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Employment"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"def extract_employment(lst):\n",
" res = []\n",
" for e in lst:\n",
" # e[0] role\n",
" # e[1] institute\n",
" # e[..] city, region, country, id, id_scheme\n",
" res.append(' '.join([e[0], e[1]]))\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Biography"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"df['biography'] = df[df.biography.notna()]['biography'].replace('', np.NaN)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 354015\n",
"unique 337007\n",
"top car title loans are a more straightforward way...\n",
"freq 343\n",
"Name: biography, dtype: object"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.biography.describe()"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>51306</th>\n",
" <td>0000-0002-7397-7977</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan upland]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-06t06:10:20.070z</td>\n",
" <td>2020-11-06t06:24:28.005z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51307</th>\n",
" <td>0000-0003-4931-9736</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan saratoga]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-13t01:04:19.859z</td>\n",
" <td>2020-11-13t01:15:12.546z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106024</th>\n",
" <td>0000-0001-8221-2303</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan victorville]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-05t00:38:21.096z</td>\n",
" <td>2020-11-05t00:40:40.091z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108770</th>\n",
" <td>0000-0001-6736-072X</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-08t05:38:30.786z</td>\n",
" <td>2020-12-08t05:40:03.786z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108771</th>\n",
" <td>0000-0002-8727-1246</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[loan agency]</td>\n",
" <td>NaN</td>\n",
" <td>[title loan on car, car title loan online, ref...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-10t08:54:56.127z</td>\n",
" <td>2020-12-10t08:57:15.791z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10875416</th>\n",
" <td>0000-0002-9640-8136</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan clovis]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-22t06:11:02.945z</td>\n",
" <td>2020-10-22t06:17:09.111z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10878239</th>\n",
" <td>0000-0002-6926-3752</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan escondido]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-03t02:00:33.684z</td>\n",
" <td>2020-12-03t02:02:07.054z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10933380</th>\n",
" <td>0000-0002-3655-4713</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan san rafael]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-18t00:39:17.492z</td>\n",
" <td>2020-11-18t00:52:19.024z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10933381</th>\n",
" <td>0000-0002-8724-1020</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan san juan capistrano]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-19t00:31:54.080z</td>\n",
" <td>2020-11-19t00:34:08.721z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10985986</th>\n",
" <td>0000-0002-4601-4569</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan mount pleasant]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-16t00:32:26.207z</td>\n",
" <td>2020-10-16t00:37:42.646z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>421 rows × 33 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"51306 0000-0002-7397-7977 1 1 \n",
"51307 0000-0003-4931-9736 1 1 \n",
"106024 0000-0001-8221-2303 1 1 \n",
"108770 0000-0001-6736-072X 1 1 \n",
"108771 0000-0002-8727-1246 1 1 \n",
"... ... ... ... \n",
"10875416 0000-0002-9640-8136 1 1 \n",
"10878239 0000-0002-6926-3752 1 1 \n",
"10933380 0000-0002-3655-4713 1 1 \n",
"10933381 0000-0002-8724-1020 1 1 \n",
"10985986 0000-0002-4601-4569 1 1 \n",
"\n",
" given_names family_name \\\n",
"51306 premium car title loans \n",
"51307 premium car title loans \n",
"106024 premium car title loans \n",
"108770 premium car title loans \n",
"108771 premium car title loans \n",
"... ... ... \n",
"10875416 premium car title loans \n",
"10878239 premium car title loans \n",
"10933380 premium car title loans \n",
"10933381 premium car title loans \n",
"10985986 premium car title loans \n",
"\n",
" biography \\\n",
"51306 car title loans are a more straightforward way... \n",
"51307 car title loans are a more straightforward way... \n",
"106024 car title loans are a more straightforward way... \n",
"108770 car title loans are a more straightforward way... \n",
"108771 car title loans are a more straightforward way... \n",
"... ... \n",
"10875416 car title loans are a more straightforward way... \n",
"10878239 car title loans are a more straightforward way... \n",
"10933380 car title loans are a more straightforward way... \n",
"10933381 car title loans are a more straightforward way... \n",
"10985986 car title loans are a more straightforward way... \n",
"\n",
" other_names primary_email \\\n",
"51306 [premium car title loans] NaN \n",
"51307 [premium car title loans] NaN \n",
"106024 [premium car title loans] NaN \n",
"108770 NaN NaN \n",
"108771 [loan agency] NaN \n",
"... ... ... \n",
"10875416 [premium car title loans] NaN \n",
"10878239 [premium car title loans] NaN \n",
"10933380 [premium car title loans] NaN \n",
"10933381 [premium car title loans] NaN \n",
"10985986 [premium car title loans] NaN \n",
"\n",
" keywords external_ids \\\n",
"51306 [car title loan upland] NaN \n",
"51307 [car title loan saratoga] NaN \n",
"106024 [car title loan victorville] NaN \n",
"108770 NaN NaN \n",
"108771 [title loan on car, car title loan online, ref... NaN \n",
"... ... ... \n",
"10875416 [car title loan clovis] NaN \n",
"10878239 [car title loan escondido] NaN \n",
"10933380 [car title loan san rafael] NaN \n",
"10933381 [car title loan san juan capistrano] NaN \n",
"10985986 [car title loan mount pleasant] NaN \n",
"\n",
" education employment n_works works_source activation_date \\\n",
"51306 NaN NaN 0 NaN 2020-11-06t06:10:20.070z \n",
"51307 NaN NaN 0 NaN 2020-11-13t01:04:19.859z \n",
"106024 NaN NaN 0 NaN 2020-11-05t00:38:21.096z \n",
"108770 NaN NaN 0 NaN 2020-12-08t05:38:30.786z \n",
"108771 NaN NaN 0 NaN 2020-12-10t08:54:56.127z \n",
"... ... ... ... ... ... \n",
"10875416 NaN NaN 0 NaN 2020-10-22t06:11:02.945z \n",
"10878239 NaN NaN 0 NaN 2020-12-03t02:00:33.684z \n",
"10933380 NaN NaN 0 NaN 2020-11-18t00:39:17.492z \n",
"10933381 NaN NaN 0 NaN 2020-11-19t00:31:54.080z \n",
"10985986 NaN NaN 0 NaN 2020-10-16t00:32:26.207z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"51306 2020-11-06t06:24:28.005z 0 0 0 0 \n",
"51307 2020-11-13t01:15:12.546z 0 0 0 0 \n",
"106024 2020-11-05t00:40:40.091z 0 0 0 0 \n",
"108770 2020-12-08t05:40:03.786z 0 0 0 0 \n",
"108771 2020-12-10t08:57:15.791z 0 0 0 0 \n",
"... ... ... ... ... ... \n",
"10875416 2020-10-22t06:17:09.111z 0 0 0 0 \n",
"10878239 2020-12-03t02:02:07.054z 0 0 0 0 \n",
"10933380 2020-11-18t00:52:19.024z 0 0 0 0 \n",
"10933381 2020-11-19t00:34:08.721z 0 0 0 0 \n",
"10985986 2020-10-16t00:37:42.646z 0 0 0 0 \n",
"\n",
" label primary_email_domain other_email_domains \\\n",
"51306 0 NaN NaN \n",
"51307 0 NaN NaN \n",
"106024 0 NaN NaN \n",
"108770 0 NaN NaN \n",
"108771 0 NaN NaN \n",
"... ... ... ... \n",
"10875416 0 NaN NaN \n",
"10878239 0 NaN NaN \n",
"10933380 0 NaN NaN \n",
"10933381 0 NaN NaN \n",
"10985986 0 NaN NaN \n",
"\n",
" url_domains n_emails n_urls n_ids n_keywords \\\n",
"51306 [premiumcartitleloans.com] NaN 1.0 NaN 1.0 \n",
"51307 [premiumcartitleloans.com] NaN 1.0 NaN 1.0 \n",
"106024 [premiumcartitleloans.com] NaN 1.0 NaN 1.0 \n",
"108770 [premiumcartitleloans.com] NaN 1.0 NaN NaN \n",
"108771 [premiumcartitleloans.com] NaN 1.0 NaN 4.0 \n",
"... ... ... ... ... ... \n",
"10875416 [premiumcartitleloans.com] NaN 1.0 NaN 1.0 \n",
"10878239 [premiumcartitleloans.com] NaN 1.0 NaN 1.0 \n",
"10933380 [premiumcartitleloans.com] NaN 1.0 NaN 1.0 \n",
"10933381 [premiumcartitleloans.com] NaN 1.0 NaN 1.0 \n",
"10985986 [premiumcartitleloans.com] NaN 1.0 NaN 1.0 \n",
"\n",
" n_education n_employment ext_works_source n_ext_work_source \\\n",
"51306 NaN NaN NaN NaN \n",
"51307 NaN NaN NaN NaN \n",
"106024 NaN NaN NaN NaN \n",
"108770 NaN NaN NaN NaN \n",
"108771 NaN NaN NaN NaN \n",
"... ... ... ... ... \n",
"10875416 NaN NaN NaN NaN \n",
"10878239 NaN NaN NaN NaN \n",
"10933380 NaN NaN NaN NaN \n",
"10933381 NaN NaN NaN NaN \n",
"10985986 NaN NaN NaN NaN \n",
"\n",
" authoritative \n",
"51306 False \n",
"51307 False \n",
"106024 False \n",
"108770 False \n",
"108771 False \n",
"... ... \n",
"10875416 False \n",
"10878239 False \n",
"10933380 False \n",
"10933381 False \n",
"10985986 False \n",
"\n",
"[421 rows x 33 columns]"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df.biography.notna()) & (df.biography.str.contains('car title loans are a more straightforward'))]"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"def score(bio):\n",
" try:\n",
" return antispam.score(bio)\n",
" except: # if len(bio) < 3 the filter doesn't know how to handle that\n",
" return -1"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"df['spam_score'] = df[df.biography.notna()]['biography'].apply(lambda bio: score(bio))"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>biography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>25505</th>\n",
" <td>0000-0003-0505-2734</td>\n",
" <td>j</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138487</th>\n",
" <td>0000-0002-3417-7299</td>\n",
" <td>.....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139595</th>\n",
" <td>0000-0003-3794-1288</td>\n",
" <td>m.d., ph.d.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>193340</th>\n",
" <td>0000-0001-9655-4806</td>\n",
" <td>肿瘤</td>\n",
" </tr>\n",
" <tr>\n",
" <th>194990</th>\n",
" <td>0000-0002-9149-0142</td>\n",
" <td>be y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10927866</th>\n",
" <td>0000-0002-7341-5480</td>\n",
" <td>ph.d.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10976080</th>\n",
" <td>0000-0003-4041-0840</td>\n",
" <td>/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10976689</th>\n",
" <td>0000-0002-4285-8537</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>10976922</th>\n",
" <td>0000-0002-1545-8773</td>\n",
" <td>hi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10987379</th>\n",
" <td>0000-0002-6302-4224</td>\n",
" <td>.</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>348 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid biography\n",
"25505 0000-0003-0505-2734 j\n",
"138487 0000-0002-3417-7299 .....\n",
"139595 0000-0003-3794-1288 m.d., ph.d.\n",
"193340 0000-0001-9655-4806 肿瘤\n",
"194990 0000-0002-9149-0142 be y\n",
"... ... ...\n",
"10927866 0000-0002-7341-5480 ph.d.\n",
"10976080 0000-0003-4041-0840 /\n",
"10976689 0000-0002-4285-8537 \n",
"10976922 0000-0002-1545-8773 hi\n",
"10987379 0000-0002-6302-4224 .\n",
"\n",
"[348 rows x 2 columns]"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.spam_score == -1][['orcid','biography']]"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"df['spam_score'] = df['spam_score'].replace(-1, np.NaN)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 3.536670e+05\n",
"mean 6.098044e-01\n",
"std 4.476618e-01\n",
"min 1.917500e-22\n",
"25% 1.858235e-02\n",
"50% 9.529688e-01\n",
"75% 9.999992e-01\n",
"max 1.000000e+00\n",
"Name: spam_score, dtype: float64"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.spam_score.describe()"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>biography</th>\n",
" <th>spam_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>investigador de la universidad de oviedo. depa...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>formación académica en la temática de manejo d...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>217</th>\n",
" <td>doctor en educación, maestro en gerencia de la...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>222</th>\n",
" <td>possui graduação em psicologia pela pontifícia...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>470</th>\n",
" <td>roofing contractors in seattle waroofing contr...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989593</th>\n",
" <td>jose ignacio peláez sánchez ha sido profesor e...</td>\n",
" <td>0.999966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989603</th>\n",
" <td>mestranda em tecnologia na saúde e foi aluna o...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989605</th>\n",
" <td>the phd degree of pharmacy was received under ...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989615</th>\n",
" <td>mostafa metwaly is an assistant lecturer at th...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989617</th>\n",
" <td>jual obat aborsi di tangerang, obat penggugur ...</td>\n",
" <td>0.999999</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>120733 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" biography spam_score\n",
"29 investigador de la universidad de oviedo. depa... 1.000000\n",
"83 formación académica en la temática de manejo d... 1.000000\n",
"217 doctor en educación, maestro en gerencia de la... 1.000000\n",
"222 possui graduação em psicologia pela pontifícia... 1.000000\n",
"470 roofing contractors in seattle waroofing contr... 1.000000\n",
"... ... ...\n",
"10989593 jose ignacio peláez sánchez ha sido profesor e... 0.999966\n",
"10989603 mestranda em tecnologia na saúde e foi aluna o... 1.000000\n",
"10989605 the phd degree of pharmacy was received under ... 1.000000\n",
"10989615 mostafa metwaly is an assistant lecturer at th... 1.000000\n",
"10989617 jual obat aborsi di tangerang, obat penggugur ... 0.999999\n",
"\n",
"[120733 rows x 2 columns]"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.spam_score > 0.9999][['biography', 'spam_score']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## All VS All correlation"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"coloraxis": "coloraxis",
"hovertemplate": "x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>",
"name": "0",
"type": "heatmap",
"x": [
"verified_email",
"verified_primary_email",
"n_works",
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
"n_emails",
"n_urls",
"n_ids",
"n_keywords",
"n_education",
"n_employment",
"n_ext_work_source",
"authoritative",
"spam_score"
],
"xaxis": "x",
"y": [
"verified_email",
"verified_primary_email",
"n_works",
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
"n_emails",
"n_urls",
"n_ids",
"n_keywords",
"n_education",
"n_employment",
"n_ext_work_source",
"authoritative",
"spam_score"
],
"yaxis": "y",
"z": [
[
1,
0.9649829131836175,
0.07899833525811681,
0.07259719921935885,
0.0064613638682561435,
0.030614701011724112,
0.0606246420123506,
0.15318397733660719,
0.03267144560134065,
0.10712349577355784,
0.14475767748321952,
0.10243044622702734,
0.22284375415000315,
0.2240544946507108,
0.22551664240183317,
0.21531668352180736,
0.08155609963046857
],
[
0.9649829131836175,
1,
0.08183974046700901,
0.07518160639621203,
0.0066860590291805974,
0.031712353459948744,
0.06277678931008057,
0.1599569518292285,
0.03226243840878624,
0.11113100230411314,
0.15032740706571793,
0.10624021870034253,
0.23154024673662948,
0.23250920740301625,
0.2327233169990374,
0.22184413814952422,
0.08473806446744285
],
[
0.07899833525811681,
0.08183974046700901,
1,
0.9378726254398347,
0.3126299250047347,
0.35108563893979355,
0.8353346326813307,
0.22974076078506034,
0.03160640765461562,
0.14838588295615024,
0.37171182274445363,
0.1506365739780303,
0.13686193479055792,
0.21343320832924814,
0.4005951950706468,
0.29903923828342194,
0.10427066314698204
],
[
0.07259719921935885,
0.07518160639621203,
0.9378726254398347,
1,
0.35605399617723354,
0.3624050122938972,
0.8018196175347003,
0.2133388352039094,
0.028320630207299337,
0.12192573243272048,
0.3525468899622581,
0.12916204989780206,
0.11736450285212531,
0.18470550214116468,
0.3834831573219326,
0.2878040134817502,
0.0847788556234709
],
[
0.0064613638682561435,
0.0066860590291805974,
0.3126299250047347,
0.35605399617723354,
1,
0.0009072282179230706,
0.2420914875526222,
0.019397970952505558,
0.002099309887982074,
0.0064144255162447246,
0.009269883208277058,
0.005433864001670957,
0.008619933999683011,
0.015077339853222701,
0.023203121401780318,
0.024401000483449085,
0.001996599951350674
],
[
0.030614701011724112,
0.031712353459948744,
0.35108563893979355,
0.3624050122938972,
0.0009072282179230706,
1,
0.2570742999530523,
0.08736856703203819,
0.00898053907910667,
0.04988227847309645,
0.08759311081674451,
0.04978801517329604,
0.05364308187508679,
0.09230230828045376,
0.15718240355316795,
0.11447380021015284,
0.03140359992079803
],
[
0.0606246420123506,
0.06277678931008057,
0.8353346326813307,
0.8018196175347003,
0.2420914875526222,
0.2570742999530523,
1,
0.17528852589876096,
0.022792492767692595,
0.12058320737626094,
0.3393283270986452,
0.11149215572697663,
0.09603598655375359,
0.16336464942113507,
0.32867917711898453,
0.24303586233735427,
0.0804405843409754
],
[
0.15318397733660719,
0.1599569518292285,
0.22974076078506034,
0.2133388352039094,
0.019397970952505558,
0.08736856703203819,
0.17528852589876096,
1,
0.04476119076904941,
0.11879105144915103,
0.3531414487762022,
0.1558580888934057,
0.22323948654903716,
0.2576472824121809,
0.5421508464201483,
0.5245689815824472,
0.07370042715630325
],
[
0.03267144560134065,
0.03226243840878624,
0.03160640765461562,
0.028320630207299337,
0.002099309887982074,
0.00898053907910667,
0.022792492767692595,
0.04476119076904941,
1,
0.07143241126539773,
0.06149968615329382,
0.07528446624958736,
0.0730249552881224,
0.07128594621281013,
0.07937133873035572,
0.06230524231081846,
0.04577917740022739
],
[
0.10712349577355784,
0.11113100230411314,
0.14838588295615024,
0.12192573243272048,
0.0064144255162447246,
0.04988227847309645,
0.12058320737626094,
0.11879105144915103,
0.07143241126539773,
1,
0.2085344284826277,
0.3756141239879568,
0.20860391435209405,
0.2439338448964409,
0.2262491070521664,
0.16788803002413893,
0.2625511135518398
],
[
0.14475767748321952,
0.15032740706571793,
0.37171182274445363,
0.3525468899622581,
0.009269883208277058,
0.08759311081674451,
0.3393283270986452,
0.3531414487762022,
0.06149968615329382,
0.2085344284826277,
1,
0.23998646957005906,
0.2584672204668393,
0.3193726129742757,
0.6563247307005879,
0.5613898867367777,
0.12286230050873165
],
[
0.10243044622702734,
0.10624021870034253,
0.1506365739780303,
0.12916204989780206,
0.005433864001670957,
0.04978801517329604,
0.11149215572697663,
0.1558580888934057,
0.07528446624958736,
0.3756141239879568,
0.23998646957005906,
1,
0.28174315114239534,
0.29513823401207673,
0.2602571143552704,
0.19656175381120386,
0.23984302793241283
],
[
0.22284375415000315,
0.23154024673662948,
0.13686193479055792,
0.11736450285212531,
0.008619933999683011,
0.05364308187508679,
0.09603598655375359,
0.22323948654903716,
0.0730249552881224,
0.20860391435209405,
0.2584672204668393,
0.28174315114239534,
1,
0.5935197907835382,
0.34969846406582145,
0.2903708599677345,
0.17769129948382462
],
[
0.2240544946507108,
0.23250920740301625,
0.21343320832924814,
0.18470550214116468,
0.015077339853222701,
0.09230230828045376,
0.16336464942113507,
0.2576472824121809,
0.07128594621281013,
0.2439338448964409,
0.3193726129742757,
0.29513823401207673,
0.5935197907835382,
1,
0.4068774187637994,
0.33563874143735034,
0.1802962353340893
],
[
0.22551664240183317,
0.2327233169990374,
0.4005951950706468,
0.3834831573219326,
0.023203121401780318,
0.15718240355316795,
0.32867917711898453,
0.5421508464201483,
0.07937133873035572,
0.2262491070521664,
0.6563247307005879,
0.2602571143552704,
0.34969846406582145,
0.4068774187637994,
1,
0.9115220886860745,
0.1362636158929976
],
[
0.21531668352180736,
0.22184413814952422,
0.29903923828342194,
0.2878040134817502,
0.024401000483449085,
0.11447380021015284,
0.24303586233735427,
0.5245689815824472,
0.06230524231081846,
0.16788803002413893,
0.5613898867367777,
0.19656175381120386,
0.2903708599677345,
0.33563874143735034,
0.9115220886860745,
1,
0.09131343502574513
],
[
0.08155609963046857,
0.08473806446744285,
0.10427066314698204,
0.0847788556234709,
0.001996599951350674,
0.03140359992079803,
0.0804405843409754,
0.07370042715630325,
0.04577917740022739,
0.2625511135518398,
0.12286230050873165,
0.23984302793241283,
0.17769129948382462,
0.1802962353340893,
0.1362636158929976,
0.09131343502574513,
1
]
]
}
],
"layout": {
"coloraxis": {
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"xaxis": {
"anchor": "y",
"constrain": "domain",
"domain": [
0,
1
],
"scaleanchor": "y"
},
"yaxis": {
"anchor": "x",
"autorange": "reversed",
"constrain": "domain",
"domain": [
0,
1
]
}
}
},
"text/html": [
"<div> <div id=\"07f6a139-9b12-486c-896d-237bc1a237fe\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"07f6a139-9b12-486c-896d-237bc1a237fe\")) { Plotly.newPlot( \"07f6a139-9b12-486c-896d-237bc1a237fe\", [{\"coloraxis\": \"coloraxis\", \"hovertemplate\": \"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\", \"name\": \"0\", \"type\": \"heatmap\", \"x\": [\"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\", \"n_education\", \"n_employment\", \"n_ext_work_source\", \"authoritative\", \"spam_score\"], \"xaxis\": \"x\", \"y\": [\"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\", \"n_education\", \"n_employment\", \"n_ext_work_source\", \"authoritative\", \"spam_score\"], \"yaxis\": \"y\", \"z\": [[1.0, 0.9649829131836175, 0.07899833525811681, 0.07259719921935885, 0.0064613638682561435, 0.030614701011724112, 0.0606246420123506, 0.15318397733660719, 0.03267144560134065, 0.10712349577355784, 0.14475767748321952, 0.10243044622702734, 0.22284375415000315, 0.2240544946507108, 0.22551664240183317, 0.21531668352180736, 0.08155609963046857], [0.9649829131836175, 1.0, 0.08183974046700901, 0.07518160639621203, 0.0066860590291805974, 0.031712353459948744, 0.06277678931008057, 0.1599569518292285, 0.03226243840878624, 0.11113100230411314, 0.15032740706571793, 0.10624021870034253, 0.23154024673662948, 0.23250920740301625, 0.2327233169990374, 0.22184413814952422, 0.08473806446744285], [0.07899833525811681, 0.08183974046700901, 1.0, 0.9378726254398347, 0.3126299250047347, 0.35108563893979355, 0.8353346326813307, 0.22974076078506034, 0.03160640765461562, 0.14838588295615024, 0.37171182274445363, 0.1506365739780303, 0.13686193479055792, 0.21343320832924814, 0.4005951950706468, 0.29903923828342194, 0.10427066314698204], [0.07259719921935885, 0.07518160639621203, 0.9378726254398347, 1.0, 0.35605399617723354, 0.3624050122938972, 0.8018196175347003, 0.2133388352039094, 0.028320630207299337, 0.12192573243272048, 0.3525468899622581, 0.12916204989780206, 0.11736450285212531, 0.18470550214116468, 0.3834831573219326, 0.2878040134817502, 0.0847788556234709], [0.0064613638682561435, 0.0066860590291805974, 0.3126299250047347, 0.35605399617723354, 1.0, 0.0009072282179230706, 0.2420914875526222, 0.019397970952505558, 0.002099309887982074, 0.0064144255162447246, 0.009269883208277058, 0.005433864001670957, 0.008619933999683011, 0.015077339853222701, 0.023203121401780318, 0.024401000483449085, 0.001996599951350674], [0.030614701011724112, 0.031712353459948744, 0.35108563893979355, 0.3624050122938972, 0.0009072282179230706, 1.0, 0.2570742999530523, 0.08736856703203819, 0.00898053907910667, 0.04988227847309645, 0.08759311081674451, 0.04978801517329604, 0.05364308187508679, 0.09230230828045376, 0.15718240355316795, 0.11447380021015284, 0.03140359992079803], [0.0606246420123506, 0.06277678931008057, 0.8353346326813307, 0.8018196175347003, 0.2420914875526222, 0.2570742999530523, 1.0, 0.17528852589876096, 0.022792492767692595, 0.12058320737626094, 0.3393283270986452, 0.11149215572697663, 0.09603598655375359, 0.16336464942113507, 0.32867917711898453, 0.24303586233735427, 0.0804405843409754], [0.15318397733660719, 0.1599569518292285, 0.22974076078506034, 0.2133388352039094, 0.019397970952505558, 0.08736856703203819, 0.17528852589876096, 1.0, 0.04476119076904941, 0.11879105144915103, 0.3531414487762022, 0.1558580888934057, 0.22323948654903716, 0.2576472824121809, 0.5421508464201483, 0.5245689815824472, 0.07370042715630325], [0.03267144560134065, 0.03226243840878624, 0.03160640765461562, 0.028320630207299337, 0.002099309887982074, 0.00898053907910667, 0.022792492767692595, 0.04476119076904941, 1.0, 0.07143241126539773, 0.06149968615329382, 0.07528446624958736, 0.0730249552881224, 0.07128594621281013, 0.07937133873035572, 0.06230524231081846, 0.04577917740022739], [0.10712349577355784, 0.11113100230411314, 0.14838588295615024, 0.12192573243272048, 0.0064144255162447246, 0.04988227847309645, 0.12058320737626094, 0.11879105144915103, 0.07143241126539773, 1.0, 0.2085344284826277, 0.3756141239879568, 0.20860391435209405, 0.2439338448964409, 0.2262491070521664, 0.16788803002413893, 0.2625511135518398], [0.14475767748321952, 0.15032740706571793, 0.37171182274445363, 0.3525468899622581, 0.009269883208277058, 0.08759311081674451, 0.3393283270986452, 0.3531414487762022, 0.06149968615329382, 0.2085344284826277, 1.0, 0.23998646957005906, 0.2584672204668393, 0.3193726129742757, 0.6563247307005879, 0.5613898867367777, 0.12286230050873165], [0.10243044622702734, 0.10624021870034253, 0.1506365739780303, 0.12916204989780206, 0.005433864001670957, 0.04978801517329604, 0.11149215572697663, 0.1558580888934057, 0.07528446624958736, 0.3756141239879568, 0.23998646957005906, 1.0, 0.28174315114239534, 0.29513823401207673, 0.2602571143552704, 0.19656175381120386, 0.23984302793241283], [0.22284375415000315, 0.23154024673662948, 0.13686193479055792, 0.11736450285212531, 0.008619933999683011, 0.05364308187508679, 0.09603598655375359, 0.22323948654903716, 0.0730249552881224, 0.20860391435209405, 0.2584672204668393, 0.28174315114239534, 1.0, 0.5935197907835382, 0.34969846406582145, 0.2903708599677345, 0.17769129948382462], [0.2240544946507108, 0.23250920740301625, 0.21343320832924814, 0.18470550214116468, 0.015077339853222701, 0.09230230828045376, 0.16336464942113507, 0.2576472824121809, 0.07128594621281013, 0.2439338448964409, 0.3193726129742757, 0.29513823401207673, 0.5935197907835382, 1.0, 0.4068774187637994, 0.33563874143735034, 0.1802962353340893], [0.22551664240183317, 0.2327233169990374, 0.4005951950706468, 0.3834831573219326, 0.023203121401780318, 0.15718240355316795, 0.32867917711898453, 0.5421508464201483, 0.07937133873035572, 0.2262491070521664, 0.6563247307005879, 0.2602571143552704, 0.34969846406582145, 0.4068774187637994, 1.0, 0.9115220886860745, 0.1362636158929976], [0.21531668352180736, 0.22184413814952422, 0.29903923828342194, 0.2878040134817502, 0.024401000483449085, 0.11447380021015284, 0.24303586233735427, 0.5245689815824472, 0.06230524231081846, 0.16788803002413893, 0.5613898867367777, 0.19656175381120386, 0.2903708599677345, 0.33563874143735034, 0.9115220886860745, 1.0, 0.09131343502574513], [0.08155609963046857, 0.08473806446744285, 0.10427066314698204, 0.0847788556234709, 0.001996599951350674, 0.03140359992079803, 0.0804405843409754, 0.07370042715630325, 0.04577917740022739, 0.2625511135518398, 0.12286230050873165, 0.23984302793241283, 0.17769129948382462, 0.1802962353340893, 0.1362636158929976, 0.09131343502574513, 1.0]]}], {\"coloraxis\": {\"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"margin\": {\"t\": 60}, \"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"xaxis\": {\"anchor\": \"y\", \"constrain\": \"domain\", \"domain\": [0.0, 1.0], \"scaleanchor\": \"y\"}, \"yaxis\": {\"anchor\": \"x\", \"autorange\": \"reversed\", \"constrain\": \"domain\", \"domain\": [0.0, 1.0]}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('07f6a139-9b12-486c-896d-237bc1a237fe');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = px.imshow(df.fillna(-1).corr())\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"df[['verified_email', \n",
" 'verified_primary_email', \n",
" 'n_works', \n",
" 'n_doi',\n",
" 'n_arxiv', \n",
" 'n_pmc', \n",
" 'n_other_pids', \n",
" 'n_emails', \n",
" 'n_urls', \n",
" 'n_ids', \n",
" 'n_keywords', \n",
" 'n_employment', \n",
" 'n_education', \n",
" 'label']].to_pickle('../data/processed/features.pkl')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Label speculation"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" <th>spam_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0000-0002-0137-3066</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-07-25t04:34:17.338z</td>\n",
" <td>2019-11-27t17:54:45.418z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0000-0002-0461-9711</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>[crossref]</td>\n",
" <td>2015-08-18t12:42:01.797z</td>\n",
" <td>2019-12-06t11:37:38.203z</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0000-0002-0761-9450</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2020-05-13t17:15:28.405z</td>\n",
" <td>2020-08-11t21:00:45.694z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>0000-0002-4447-9215</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-07-24t09:37:50.242z</td>\n",
" <td>2019-11-15t08:31:24.820z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>0000-0003-0426-4065</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[eliza i. gilbert]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, us fish and wildlife service, albuquerque,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-08-07t18:32:31.802z</td>\n",
" <td>2020-04-08t16:48:55.732z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989636</th>\n",
" <td>0000-0002-2906-0299</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>tiffany</td>\n",
" <td>mackay</td>\n",
" <td>NaN</td>\n",
" <td>[tiffany russel sia]</td>\n",
" <td>NaN</td>\n",
" <td>[prostate cancer, oxytocin, radiolabelling, ga...</td>\n",
" <td>[[researcherid, a-2121-2017]]</td>\n",
" <td>[[faculty of medicine, master in pharmaceutica...</td>\n",
" <td>[[clinical project lead, minomic international...</td>\n",
" <td>11</td>\n",
" <td>[crossref, researcherid, tiffany mackay]</td>\n",
" <td>2017-01-03t23:28:48.736z</td>\n",
" <td>2020-12-09t17:12:20.326z</td>\n",
" <td>11</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[oxytocin.com.au, linkedin.com]</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>13.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>[crossref, researcherid]</td>\n",
" <td>2.0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989637</th>\n",
" <td>0000-0001-5896-2024</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>giovanni, l</td>\n",
" <td>tiscia</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[scopus author id, 54948242800]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>70</td>\n",
" <td>[scopus - elsevier, tiscia giovanni, l, europe...</td>\n",
" <td>2016-07-27t10:09:13.585z</td>\n",
" <td>2020-12-07t22:23:05.706z</td>\n",
" <td>65</td>\n",
" <td>0</td>\n",
" <td>17</td>\n",
" <td>52</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus - elsevier, europe pubmed central, cro...</td>\n",
" <td>3.0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989640</th>\n",
" <td>0000-0002-1070-2220</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>vinicios</td>\n",
" <td>santanna</td>\n",
" <td>NaN</td>\n",
" <td>[vinicios sant anna, vinicios sant anna, vinic...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[scopus author id, 57201697952]]</td>\n",
" <td>[[economics, ph.d., university of illinois at ...</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[crossref metadata search]</td>\n",
" <td>2016-03-19t21:24:42.821z</td>\n",
" <td>2020-12-10t16:34:09.722z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[vpsantanna.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>[crossref metadata search]</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989643</th>\n",
" <td>0000-0003-2606-0936</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>luang</td>\n",
" <td>xu</td>\n",
" <td>NaN</td>\n",
" <td>[xu lu-ang, lu lu]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[post-doc, institute of biochemistry and cell...</td>\n",
" <td>2</td>\n",
" <td>[scopus - elsevier, crossref]</td>\n",
" <td>2015-10-24t03:53:23.544z</td>\n",
" <td>2020-11-19t09:23:48.896z</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>[scopus - elsevier, crossref]</td>\n",
" <td>2.0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>zachary</td>\n",
" <td>calamari</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[richard gilder graduate school, phd in compa...</td>\n",
" <td>[[assistant professor, baruch college, city un...</td>\n",
" <td>7</td>\n",
" <td>[crossref metadata search, zachary t. calamari...</td>\n",
" <td>2015-01-20t20:20:17.042z</td>\n",
" <td>2020-11-21t19:48:36.221z</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>[crossref metadata search, crossref]</td>\n",
" <td>2.0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2664886 rows × 34 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"17 0000-0002-0137-3066 1 1 \n",
"19 0000-0002-0461-9711 1 1 \n",
"22 0000-0002-0761-9450 1 1 \n",
"33 0000-0002-4447-9215 1 1 \n",
"44 0000-0003-0426-4065 1 1 \n",
"... ... ... ... \n",
"10989636 0000-0002-2906-0299 1 1 \n",
"10989637 0000-0001-5896-2024 1 1 \n",
"10989640 0000-0002-1070-2220 1 1 \n",
"10989643 0000-0003-2606-0936 1 1 \n",
"10989645 0000-0002-3800-6331 1 1 \n",
"\n",
" given_names family_name biography \\\n",
"17 NaN NaN NaN \n",
"19 NaN NaN NaN \n",
"22 NaN NaN NaN \n",
"33 NaN NaN NaN \n",
"44 NaN NaN NaN \n",
"... ... ... ... \n",
"10989636 tiffany mackay NaN \n",
"10989637 giovanni, l tiscia NaN \n",
"10989640 vinicios santanna NaN \n",
"10989643 luang xu NaN \n",
"10989645 zachary calamari NaN \n",
"\n",
" other_names primary_email \\\n",
"17 NaN NaN \n",
"19 NaN NaN \n",
"22 NaN NaN \n",
"33 NaN NaN \n",
"44 [eliza i. gilbert] NaN \n",
"... ... ... \n",
"10989636 [tiffany russel sia] NaN \n",
"10989637 NaN NaN \n",
"10989640 [vinicios sant anna, vinicios sant anna, vinic... NaN \n",
"10989643 [xu lu-ang, lu lu] NaN \n",
"10989645 NaN NaN \n",
"\n",
" keywords \\\n",
"17 NaN \n",
"19 NaN \n",
"22 NaN \n",
"33 NaN \n",
"44 NaN \n",
"... ... \n",
"10989636 [prostate cancer, oxytocin, radiolabelling, ga... \n",
"10989637 NaN \n",
"10989640 NaN \n",
"10989643 NaN \n",
"10989645 NaN \n",
"\n",
" external_ids \\\n",
"17 NaN \n",
"19 NaN \n",
"22 NaN \n",
"33 NaN \n",
"44 NaN \n",
"... ... \n",
"10989636 [[researcherid, a-2121-2017]] \n",
"10989637 [[scopus author id, 54948242800]] \n",
"10989640 [[scopus author id, 57201697952]] \n",
"10989643 NaN \n",
"10989645 NaN \n",
"\n",
" education \\\n",
"17 NaN \n",
"19 NaN \n",
"22 NaN \n",
"33 NaN \n",
"44 NaN \n",
"... ... \n",
"10989636 [[faculty of medicine, master in pharmaceutica... \n",
"10989637 NaN \n",
"10989640 [[economics, ph.d., university of illinois at ... \n",
"10989643 NaN \n",
"10989645 [[richard gilder graduate school, phd in compa... \n",
"\n",
" employment n_works \\\n",
"17 NaN 0 \n",
"19 NaN 2 \n",
"22 NaN 1 \n",
"33 NaN 0 \n",
"44 [[, us fish and wildlife service, albuquerque,... 0 \n",
"... ... ... \n",
"10989636 [[clinical project lead, minomic international... 11 \n",
"10989637 NaN 70 \n",
"10989640 NaN 1 \n",
"10989643 [[post-doc, institute of biochemistry and cell... 2 \n",
"10989645 [[assistant professor, baruch college, city un... 7 \n",
"\n",
" works_source \\\n",
"17 NaN \n",
"19 [crossref] \n",
"22 [crossref] \n",
"33 NaN \n",
"44 NaN \n",
"... ... \n",
"10989636 [crossref, researcherid, tiffany mackay] \n",
"10989637 [scopus - elsevier, tiscia giovanni, l, europe... \n",
"10989640 [crossref metadata search] \n",
"10989643 [scopus - elsevier, crossref] \n",
"10989645 [crossref metadata search, zachary t. calamari... \n",
"\n",
" activation_date last_update_date n_doi n_arxiv \\\n",
"17 2017-07-25t04:34:17.338z 2019-11-27t17:54:45.418z 0 0 \n",
"19 2015-08-18t12:42:01.797z 2019-12-06t11:37:38.203z 2 0 \n",
"22 2020-05-13t17:15:28.405z 2020-08-11t21:00:45.694z 1 0 \n",
"33 2017-07-24t09:37:50.242z 2019-11-15t08:31:24.820z 0 0 \n",
"44 2017-08-07t18:32:31.802z 2020-04-08t16:48:55.732z 0 0 \n",
"... ... ... ... ... \n",
"10989636 2017-01-03t23:28:48.736z 2020-12-09t17:12:20.326z 11 0 \n",
"10989637 2016-07-27t10:09:13.585z 2020-12-07t22:23:05.706z 65 0 \n",
"10989640 2016-03-19t21:24:42.821z 2020-12-10t16:34:09.722z 1 0 \n",
"10989643 2015-10-24t03:53:23.544z 2020-11-19t09:23:48.896z 2 0 \n",
"10989645 2015-01-20t20:20:17.042z 2020-11-21t19:48:36.221z 7 0 \n",
"\n",
" n_pmc n_other_pids label primary_email_domain other_email_domains \\\n",
"17 0 0 1 NaN NaN \n",
"19 0 0 1 NaN NaN \n",
"22 0 0 1 NaN NaN \n",
"33 0 0 1 NaN NaN \n",
"44 0 0 1 NaN NaN \n",
"... ... ... ... ... ... \n",
"10989636 0 0 1 NaN NaN \n",
"10989637 17 52 1 NaN NaN \n",
"10989640 0 1 1 NaN NaN \n",
"10989643 0 1 1 NaN NaN \n",
"10989645 1 0 1 NaN NaN \n",
"\n",
" url_domains n_emails n_urls n_ids \\\n",
"17 NaN NaN NaN NaN \n",
"19 NaN NaN NaN NaN \n",
"22 NaN NaN NaN NaN \n",
"33 NaN NaN NaN NaN \n",
"44 NaN NaN NaN NaN \n",
"... ... ... ... ... \n",
"10989636 [oxytocin.com.au, linkedin.com] NaN 2.0 1.0 \n",
"10989637 NaN NaN NaN 1.0 \n",
"10989640 [vpsantanna.com] NaN 1.0 1.0 \n",
"10989643 NaN NaN NaN NaN \n",
"10989645 NaN NaN NaN NaN \n",
"\n",
" n_keywords n_education n_employment \\\n",
"17 NaN NaN NaN \n",
"19 NaN NaN NaN \n",
"22 NaN NaN NaN \n",
"33 NaN NaN NaN \n",
"44 NaN NaN 1.0 \n",
"... ... ... ... \n",
"10989636 13.0 2.0 4.0 \n",
"10989637 NaN NaN NaN \n",
"10989640 NaN 2.0 NaN \n",
"10989643 NaN NaN 1.0 \n",
"10989645 NaN 2.0 2.0 \n",
"\n",
" ext_works_source \\\n",
"17 NaN \n",
"19 NaN \n",
"22 NaN \n",
"33 NaN \n",
"44 NaN \n",
"... ... \n",
"10989636 [crossref, researcherid] \n",
"10989637 [scopus - elsevier, europe pubmed central, cro... \n",
"10989640 [crossref metadata search] \n",
"10989643 [scopus - elsevier, crossref] \n",
"10989645 [crossref metadata search, crossref] \n",
"\n",
" n_ext_work_source authoritative spam_score \n",
"17 NaN False NaN \n",
"19 NaN False NaN \n",
"22 NaN False NaN \n",
"33 NaN False NaN \n",
"44 NaN False NaN \n",
"... ... ... ... \n",
"10989636 2.0 True NaN \n",
"10989637 3.0 True NaN \n",
"10989640 1.0 True NaN \n",
"10989643 2.0 True NaN \n",
"10989645 2.0 True NaN \n",
"\n",
"[2664886 rows x 34 columns]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.label == 1]"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 10989649 entries, 0 to 10989648\n",
"Data columns (total 30 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 orcid string\n",
" 1 verified_email bool \n",
" 2 verified_primary_email bool \n",
" 3 given_names string\n",
" 4 family_name string\n",
" 5 biography string\n",
" 6 other_names object\n",
" 7 primary_email string\n",
" 8 keywords object\n",
" 9 external_ids object\n",
" 10 education object\n",
" 11 employment object\n",
" 12 n_works int64 \n",
" 13 works_source object\n",
" 14 activation_date string\n",
" 15 last_update_date string\n",
" 16 n_doi int64 \n",
" 17 n_arxiv int64 \n",
" 18 n_pmc int64 \n",
" 19 n_other_pids int64 \n",
" 20 label int64 \n",
" 21 primary_email_domain object\n",
" 22 other_email_domains object\n",
" 23 url_domains object\n",
" 24 n_emails UInt16\n",
" 25 n_urls UInt16\n",
" 26 n_ids UInt16\n",
" 27 n_keywords UInt16\n",
" 28 n_education UInt16\n",
" 29 n_employment UInt16\n",
"dtypes: UInt16(6), bool(2), int64(6), object(9), string(7)\n",
"memory usage: 2.0+ GB\n"
]
}
],
"source": [
"# (df.n_works > 0) & (df.n_ids > 1)\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"df.n_ids = df.n_ids.astype(pd.UInt16Dtype())"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2016-07-27 10:09:13.585000+00:00\n",
"1 2016-07-27 10:09:13.585000+00:00\n",
"2 NaT\n",
"3 2016-07-27 10:09:13.585000+00:00\n",
"dtype: datetime64[ns, UTC]"
]
},
"execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.Series(['2016-07-27t10:09:13.585z', '2016-07-27t10:09:13.585z', pd.NA, '2016-07-27t10:09:13.585z'])"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-108-741fcb5ea182>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mactivation_date\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/.virtualenvs/data-science/lib/python3.8/site-packages/pandas/core/tools/datetimes.py\u001b[0m in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 803\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcache_array\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 804\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 805\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_listlike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 806\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 807\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mABCDataFrame\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMutableMapping\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/data-science/lib/python3.8/site-packages/pandas/core/tools/datetimes.py\u001b[0m in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mformat\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0minfer_datetime_format\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 464\u001b[0m \u001b[0mutc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtz\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"utc\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 465\u001b[0;31m result, tz_parsed = objects_to_datetime64ns(\n\u001b[0m\u001b[1;32m 466\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 467\u001b[0m \u001b[0mdayfirst\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdayfirst\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/data-science/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py\u001b[0m in \u001b[0;36mobjects_to_datetime64ns\u001b[0;34m(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)\u001b[0m\n\u001b[1;32m 2073\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2074\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2075\u001b[0;31m result, tz_parsed = tslib.array_to_datetime(\n\u001b[0m\u001b[1;32m 2076\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2077\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslibs/parsing.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.parsing.parse_datetime_string\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslibs/parsing.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.parsing._parse_dateabbr_string\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/_strptime.py\u001b[0m in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m 566\u001b[0m \"\"\"Return a class cls instance based on the input string and the\n\u001b[1;32m 567\u001b[0m format string.\"\"\"\n\u001b[0;32m--> 568\u001b[0;31m \u001b[0mtt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff_fraction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_strptime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 569\u001b[0m \u001b[0mtzname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgmtoff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 570\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfraction\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/_strptime.py\u001b[0m in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m 347\u001b[0m \u001b[0mfound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mformat_regex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 348\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m raise ValueError(\"time data %r does not match format %r\" %\n\u001b[0m\u001b[1;32m 350\u001b[0m (data_string, format))\n\u001b[1;32m 351\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_string\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mfound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"pd.to_datetime(df.activation_date)"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"df['label'] = df['label'].astype('bool')\n"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 10989649 entries, 0 to 10989648\n",
"Data columns (total 30 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 orcid string\n",
" 1 verified_email bool \n",
" 2 verified_primary_email bool \n",
" 3 given_names string\n",
" 4 family_name string\n",
" 5 biography string\n",
" 6 other_names object\n",
" 7 primary_email string\n",
" 8 keywords object\n",
" 9 external_ids object\n",
" 10 education object\n",
" 11 employment object\n",
" 12 n_works int64 \n",
" 13 works_source object\n",
" 14 activation_date string\n",
" 15 last_update_date string\n",
" 16 n_doi int64 \n",
" 17 n_arxiv int64 \n",
" 18 n_pmc int64 \n",
" 19 n_other_pids int64 \n",
" 20 label bool \n",
" 21 primary_email_domain object\n",
" 22 other_email_domains object\n",
" 23 url_domains object\n",
" 24 n_emails UInt16\n",
" 25 n_urls UInt16\n",
" 26 n_ids UInt16\n",
" 27 n_keywords UInt16\n",
" 28 n_education UInt16\n",
" 29 n_employment UInt16\n",
"dtypes: UInt16(6), bool(3), int64(5), object(9), string(7)\n",
"memory usage: 1.9+ GB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}